OSDN Git Service

First simplest 3D mipmap implementation
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2012 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Renderer.hpp"
13
14 #include "Clipper.hpp"
15 #include "Math.hpp"
16 #include "FrameBuffer.hpp"
17 #include "Timer.hpp"
18 #include "Surface.hpp"
19 #include "Half.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
24 #include "CPUID.hpp"
25 #include "Memory.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
28 #include "Debug.hpp"
29 #include "Reactor/Reactor.hpp"
30
31 #include <malloc.h>
32
33 #undef max
34
35 bool disableServer = true;
36
37 #ifndef NDEBUG
38 unsigned int minPrimitives = 1;
39 unsigned int maxPrimitives = 1 << 21;
40 #endif
41
42 namespace sw
43 {
44         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
45         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
46         extern bool booleanFaceRegister;
47         extern bool fullPixelPositionRegister;
48
49         extern bool forceWindowed;
50         extern bool complementaryDepthBuffer;
51         extern bool postBlendSRGB;
52         extern bool exactColorRounding;
53         extern TransparencyAntialiasing transparencyAntialiasing;
54         extern bool forceClearRegisters;
55
56         extern bool precacheVertex;
57         extern bool precacheSetup;
58         extern bool precachePixel;
59
60         int batchSize = 128;
61         int threadCount = 1;
62         int unitCount = 1;
63         int clusterCount = 1;
64
65         TranscendentalPrecision logPrecision = ACCURATE;
66         TranscendentalPrecision expPrecision = ACCURATE;
67         TranscendentalPrecision rcpPrecision = ACCURATE;
68         TranscendentalPrecision rsqPrecision = ACCURATE;
69         bool perspectiveCorrection = true;
70
71         struct Parameters
72         {
73                 Renderer *renderer;
74                 int threadIndex;
75         };
76
77         DrawCall::DrawCall()
78         {
79                 queries = 0;
80
81                 vsDirtyConstF = 256 + 1;
82                 vsDirtyConstI = 16;
83                 vsDirtyConstB = 16;
84
85                 psDirtyConstF = 224;
86                 psDirtyConstI = 16;
87                 psDirtyConstB = 16;
88
89                 references = -1;
90
91                 data = (DrawData*)allocate(sizeof(DrawData));
92                 data->constants = &constants;
93         }
94
95         DrawCall::~DrawCall()
96         {
97                 delete queries;
98
99                 deallocate(data);
100         }
101
102         Renderer::Renderer(Context *context, bool halfIntegerCoordinates, bool symmetricNormalizedDepth, bool booleanFaceRegister, bool fullPixelPositionRegister, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
103         {
104                 sw::halfIntegerCoordinates = halfIntegerCoordinates;
105                 sw::symmetricNormalizedDepth = symmetricNormalizedDepth;
106                 sw::booleanFaceRegister = booleanFaceRegister;
107                 sw::fullPixelPositionRegister = fullPixelPositionRegister;
108                 sw::exactColorRounding = exactColorRounding;
109
110                 setRenderTarget(0, 0);
111                 clipper = new Clipper();
112
113                 updateViewMatrix = true;
114                 updateBaseMatrix = true;
115                 updateProjectionMatrix = true;
116                 updateClipPlanes = true;
117
118                 #if PERF_HUD
119                         resetTimers();
120                 #endif
121
122                 for(int i = 0; i < 16; i++)
123                 {
124                         vertexTask[i] = 0;
125
126                         worker[i] = 0;
127                         resume[i] = 0;
128                         suspend[i] = 0;
129                 }
130
131                 threadsAwake = 0;
132                 resumeApp = new Event();
133
134                 currentDraw = 0;
135                 nextDraw = 0;
136
137                 qHead = 0;
138                 qSize = 0;
139
140                 for(int i = 0; i < 16; i++)
141                 {
142                         triangleBatch[i] = 0;
143                         primitiveBatch[i] = 0;
144                 }
145
146                 for(int draw = 0; draw < DRAW_COUNT; draw++)
147                 {
148                         drawCall[draw] = new DrawCall();
149                         drawList[draw] = drawCall[draw];
150                 }
151
152                 for(int unit = 0; unit < 16; unit++)
153                 {
154                         primitiveProgress[unit].init();
155                 }
156
157                 for(int cluster = 0; cluster < 16; cluster++)
158                 {
159                         pixelProgress[cluster].init();
160                 }
161
162                 clipFlags = 0;
163
164                 swiftConfig = new SwiftConfig(disableServer);
165                 updateConfiguration(true);
166
167                 sync = new Resource(0);
168         }
169
170         Renderer::~Renderer()
171         {
172                 sync->destruct();
173
174                 delete clipper;
175                 clipper = 0;
176
177                 terminateThreads();
178                 delete resumeApp;
179
180                 for(int draw = 0; draw < DRAW_COUNT; draw++)
181                 {
182                         delete drawCall[draw];
183                 }
184
185                 delete swiftConfig;
186         }
187
188         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
189         {
190                 blitter.blit(source, sRect, dest, dRect, filter);
191         }
192
193         void Renderer::blit3D(Surface *source, Surface *dest)
194         {
195                 blitter.blit3D(source, dest);
196         }
197
198         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
199         {
200                 #ifndef NDEBUG
201                         if(count < minPrimitives || count > maxPrimitives)
202                         {
203                                 return;
204                         }
205                 #endif
206
207                 context->drawType = drawType;
208
209                 updateConfiguration();
210                 updateClipper();
211
212                 int ss = context->getSuperSampleCount();
213                 int ms = context->getMultiSampleCount();
214
215                 for(int q = 0; q < ss; q++)
216                 {
217                         int oldMultiSampleMask = context->multiSampleMask;
218                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
219
220                         if(!context->multiSampleMask)
221                         {
222                                 continue;
223                         }
224
225                         sync->lock(sw::PRIVATE);
226
227                         Routine *vertexRoutine;
228                         Routine *setupRoutine;
229                         Routine *pixelRoutine;
230
231                         if(update || oldMultiSampleMask != context->multiSampleMask)
232                         {
233                                 vertexState = VertexProcessor::update();
234                                 setupState = SetupProcessor::update();
235                                 pixelState = PixelProcessor::update();
236
237                                 vertexRoutine = VertexProcessor::routine(vertexState);
238                                 setupRoutine = SetupProcessor::routine(setupState);
239                                 pixelRoutine = PixelProcessor::routine(pixelState);
240                         }
241
242                         int batch = batchSize / ms;
243
244                         int (*setupPrimitives)(Renderer *renderer, int batch, int count);
245
246                         if(context->isDrawTriangle())
247                         {
248                                 switch(context->fillMode)
249                                 {
250                                 case FILL_SOLID:
251                                         setupPrimitives = setupSolidTriangles;
252                                         break;
253                                 case FILL_WIREFRAME:
254                                         setupPrimitives = setupWireframeTriangle;
255                                         batch = 1;
256                                         break;
257                                 case FILL_VERTEX:
258                                         setupPrimitives = setupVertexTriangle;
259                                         batch = 1;
260                                         break;
261                                 default: ASSERT(false);
262                                 }
263                         }
264                         else if(context->isDrawLine())
265                         {
266                                 setupPrimitives = setupLines;
267                         }
268                         else   // Point draw
269                         {
270                                 setupPrimitives = setupPoints;
271                         }
272
273                         DrawCall *draw = 0;
274
275                         do
276                         {
277                                 for(int i = 0; i < DRAW_COUNT; i++)
278                                 {
279                                         if(drawCall[i]->references == -1)
280                                         {
281                                                 draw = drawCall[i];
282                                                 drawList[nextDraw % DRAW_COUNT] = draw;
283
284                                                 break;
285                                         }
286                                 }
287
288                                 if(!draw)
289                                 {
290                                         resumeApp->wait();
291                                 }
292                         }
293                         while(!draw);
294
295                         DrawData *data = draw->data;
296
297                         if(queries.size() != 0)
298                         {
299                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
300                                 {
301                                         atomicIncrement(&(*query)->reference);
302                                 }
303
304                                 draw->queries = new std::list<Query*>(queries);
305                         }
306
307                         draw->drawType = drawType;
308                         draw->batchSize = batch;
309
310                         vertexRoutine->bind();
311                         setupRoutine->bind();
312                         pixelRoutine->bind();
313
314                         draw->vertexRoutine = vertexRoutine;
315                         draw->setupRoutine = setupRoutine;
316                         draw->pixelRoutine = pixelRoutine;
317                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();;
318                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
319                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
320                         draw->setupPrimitives = setupPrimitives;
321                         draw->setupState = setupState;
322
323                         for(int i = 0; i < 16; i++)
324                         {
325                                 draw->vertexStream[i] = context->input[i].resource;
326                                 data->input[i] = context->input[i].buffer;
327                                 data->stride[i] = context->input[i].stride;
328
329                                 if(draw->vertexStream[i])
330                                 {
331                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
332                                 }
333                         }
334
335                         if(context->indexBuffer)
336                         {
337                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
338                         }
339
340                         draw->indexBuffer = context->indexBuffer;
341
342                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
343                         {
344                                 draw->texture[sampler] = 0;
345                         }
346
347                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
348                         {
349                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
350                                 {
351                                         draw->texture[sampler] = context->texture[sampler];
352                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
353
354                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
355                                 }
356                         }
357
358                         if(context->pixelShader)
359                         {
360                                 if(draw->psDirtyConstF)
361                                 {
362                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
363                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
364                                         draw->psDirtyConstF = 0;
365                                 }
366
367                                 if(draw->psDirtyConstI)
368                                 {
369                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
370                                         draw->psDirtyConstI = 0;
371                                 }
372
373                                 if(draw->psDirtyConstB)
374                                 {
375                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
376                                         draw->psDirtyConstB = 0;
377                                 }
378                         }
379                         
380                         if(context->pixelShaderVersion() <= 0x0104)
381                         {
382                                 for(int stage = 0; stage < 8; stage++)
383                                 {
384                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
385                                         {
386                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
387                                         }
388                                         else break;
389                                 }
390                         }
391
392                         if(context->vertexShader)
393                         {
394                                 if(context->vertexShader->getVersion() >= 0x0300)
395                                 {
396                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
397                                         {
398                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
399                                                 {
400                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
401                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
402
403                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
404                                                 }
405                                         }
406                                 }
407
408                                 if(draw->vsDirtyConstF)
409                                 {
410                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
411                                         draw->vsDirtyConstF = 0;
412                                 }
413
414                                 if(draw->vsDirtyConstI)
415                                 {
416                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
417                                         draw->vsDirtyConstI = 0;
418                                 }
419
420                                 if(draw->vsDirtyConstB)
421                                 {
422                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
423                                         draw->vsDirtyConstB = 0;
424                                 }
425                         }
426                         else
427                         {
428                                 data->ff = ff;
429
430                                 draw->vsDirtyConstF = 256 + 1;
431                                 draw->vsDirtyConstI = 16;
432                                 draw->vsDirtyConstB = 16;
433                         }
434
435                         if(pixelState.stencilActive)
436                         {
437                                 data->stencil[0] = stencil;
438                                 data->stencil[1] = stencilCCW;
439                         }
440
441                         if(pixelState.fogActive)
442                         {
443                                 data->fog = fog;
444                         }
445
446                         if(setupState.isDrawPoint)
447                         {
448                                 data->point = point;
449                         }
450
451                         data->lineWidth = context->lineWidth;
452
453                         data->factor = factor;
454
455                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
456                         {
457                                 float ref = (float)context->alphaReference * (1.0f / 255.0f);
458                                 float margin = sw::min(ref, 1.0f - ref);
459
460                                 if(ms == 4)
461                                 {
462                                         data->a2c0 = replicate(ref - margin * 0.6f);
463                                         data->a2c1 = replicate(ref - margin * 0.2f);
464                                         data->a2c2 = replicate(ref + margin * 0.2f);
465                                         data->a2c3 = replicate(ref + margin * 0.6f);
466                                 }
467                                 else if(ms == 2)
468                                 {
469                                         data->a2c0 = replicate(ref - margin * 0.3f);
470                                         data->a2c1 = replicate(ref + margin * 0.3f);
471                                 }
472                                 else ASSERT(false);
473                         }
474
475                         if(pixelState.occlusionEnabled)
476                         {
477                                 for(int cluster = 0; cluster < clusterCount; cluster++)
478                                 {
479                                         data->occlusion[cluster] = 0;
480                                 }
481                         }
482
483                         #if PERF_PROFILE
484                                 for(int cluster = 0; cluster < clusterCount; cluster++)
485                                 {
486                                         for(int i = 0; i < PERF_TIMERS; i++)
487                                         {
488                                                 data->cycles[i][cluster] = 0;
489                                         }
490                                 }
491                         #endif
492
493                         // Viewport
494                         {
495                                 float W = 0.5f * viewport.width;
496                                 float H = 0.5f * viewport.height;
497                                 float X0 = viewport.x0 + W;
498                                 float Y0 = viewport.y0 + H;
499                                 float N = viewport.minZ;
500                                 float F = viewport.maxZ;
501                                 float Z = F - N;
502
503                                 if(context->isDrawTriangle(false))
504                                 {
505                                         N += depthBias;
506                                 }
507
508                                 if(complementaryDepthBuffer)
509                                 {
510                                         Z = -Z;
511                                         N = 1 - N;
512                                 }
513
514                                 static const float X[5][16] =   // Fragment offsets
515                                 {
516                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
517                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
518                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
519                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
520                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
521                                 };
522
523                                 static const float Y[5][16] =   // Fragment offsets
524                                 {
525                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
526                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
527                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
528                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
529                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
530                                 };
531
532                                 int s = sw::log2(ss);
533
534                                 data->Wx16 = replicate(W * 16);
535                                 data->Hx16 = replicate(H * 16);
536                                 data->X0x16 = replicate(X0 * 16);
537                                 data->Y0x16 = replicate(Y0 * 16);
538                                 data->XXXX = replicate(X[s][q] / W);
539                                 data->YYYY = replicate(Y[s][q] / H);
540                                 data->halfPixelX = replicate(0.5f / W);
541                                 data->halfPixelY = replicate(0.5f / H);
542                                 data->viewportHeight = abs(viewport.height);
543                                 data->slopeDepthBias = slopeDepthBias;
544                                 data->depthRange = Z;
545                                 data->depthNear = N;
546                                 draw->clipFlags = clipFlags;
547
548                                 if(clipFlags)
549                                 {
550                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
551                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
552                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
553                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
554                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
555                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
556                                 }
557                         }
558
559                         // Target
560                         {
561                                 for(int index = 0; index < 4; index++)
562                                 {
563                                         draw->renderTarget[index] = context->renderTarget[index];
564
565                                         if(draw->renderTarget[index])
566                                         {
567                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
568                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
569                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
570                                         }
571                                 }
572
573                                 draw->depthStencil = context->depthStencil;
574
575                                 if(draw->depthStencil)
576                                 {
577                                         data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
578                                         data->depthPitchB = context->depthStencil->getInternalPitchB();
579                                         data->depthSliceB = context->depthStencil->getInternalSliceB();
580
581                                         data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
582                                         data->stencilPitchB = context->depthStencil->getStencilPitchB();
583                                         data->stencilSliceB = context->depthStencil->getStencilSliceB();
584                                 }
585                         }
586
587                         // Scissor
588                         {
589                                 data->scissorX0 = scissor.x0;
590                                 data->scissorX1 = scissor.x1;
591                                 data->scissorY0 = scissor.y0;
592                                 data->scissorY1 = scissor.y1;
593                         }
594
595                         draw->primitive = 0;
596                         draw->count = count;
597
598                         draw->references = (count + batch - 1) / batch;
599
600                         schedulerMutex.lock();
601                         nextDraw++;
602                         schedulerMutex.unlock();
603
604                         if(!threadsAwake)
605                         {
606                                 suspend[0]->wait();
607
608                                 threadsAwake = 1;
609                                 task[0].type = Task::RESUME;
610
611                                 resume[0]->signal();
612                         }
613                 }
614         }
615
616         void Renderer::threadFunction(void *parameters)
617         {
618                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
619                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
620
621                 if(logPrecision < IEEE)
622                 {
623                         CPUID::setFlushToZero(true);
624                         CPUID::setDenormalsAreZero(true);
625                 }
626
627                 renderer->threadLoop(threadIndex);
628         }
629
630         void Renderer::threadLoop(int threadIndex)
631         {
632                 while(!exitThreads)
633                 {
634                         taskLoop(threadIndex);
635
636                         suspend[threadIndex]->signal();
637                         resume[threadIndex]->wait();
638                 }
639         }
640
641         void Renderer::taskLoop(int threadIndex)
642         {
643                 while(task[threadIndex].type != Task::SUSPEND)
644                 {
645                         scheduleTask(threadIndex);
646                         executeTask(threadIndex);
647                 }
648         }
649
650         void Renderer::findAvailableTasks()
651         {
652                 // Find pixel tasks
653                 for(int cluster = 0; cluster < clusterCount; cluster++)
654                 {
655                         if(!pixelProgress[cluster].executing)
656                         {
657                                 for(int unit = 0; unit < unitCount; unit++)
658                                 {
659                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
660                                         {
661                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
662                                                 {
663                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
664                                                         {
665                                                                 Task &task = taskQueue[qHead];
666                                                                 task.type = Task::PIXELS;
667                                                                 task.primitiveUnit = unit;
668                                                                 task.pixelCluster = cluster;
669
670                                                                 pixelProgress[cluster].executing = true;
671
672                                                                 // Commit to the task queue
673                                                                 qHead = (qHead + 1) % 32;
674                                                                 qSize++;
675
676                                                                 break;
677                                                         }
678                                                 }
679                                         }
680                                 }
681                         }
682                 }
683         
684                 // Find primitive tasks
685                 if(currentDraw == nextDraw)
686                 {
687                         return;   // No more primitives to process
688                 }
689
690                 for(int unit = 0; unit < unitCount; unit++)
691                 {
692                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
693
694                         if(draw->primitive >= draw->count)
695                         {
696                                 currentDraw++;
697
698                                 if(currentDraw == nextDraw)
699                                 {
700                                         return;   // No more primitives to process
701                                 }
702
703                                 draw = drawList[currentDraw % DRAW_COUNT];
704                         }
705
706                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
707                         {
708                                 int primitive = draw->primitive;
709                                 int count = draw->count;
710                                 int batch = draw->batchSize;
711
712                                 primitiveProgress[unit].drawCall = currentDraw;
713                                 primitiveProgress[unit].firstPrimitive = primitive;
714                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
715
716                                 draw->primitive += batch;
717
718                                 Task &task = taskQueue[qHead];
719                                 task.type = Task::PRIMITIVES;
720                                 task.primitiveUnit = unit;
721
722                                 primitiveProgress[unit].references = -1;
723
724                                 // Commit to the task queue
725                                 qHead = (qHead + 1) % 32;
726                                 qSize++;
727                         }
728                 }
729         }
730
731         void Renderer::scheduleTask(int threadIndex)
732         {
733                 schedulerMutex.lock();
734
735                 if((int)qSize < threadCount - threadsAwake + 1)
736                 {
737                         findAvailableTasks();
738                 }
739
740                 if(qSize != 0)
741                 {
742                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
743                         qSize--;
744
745                         if(threadsAwake != threadCount)
746                         {
747                                 int wakeup = qSize - threadsAwake + 1;
748
749                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
750                                 {
751                                         if(task[i].type == Task::SUSPEND)
752                                         {
753                                                 suspend[i]->wait();
754                                                 task[i].type = Task::RESUME;
755                                                 resume[i]->signal();
756
757                                                 threadsAwake++;
758                                                 wakeup--;
759                                         }
760                                 }
761                         }
762                 }
763                 else
764                 {
765                         task[threadIndex].type = Task::SUSPEND;
766
767                         threadsAwake--;
768                 }
769
770                 schedulerMutex.unlock();
771         }
772
773         void Renderer::executeTask(int threadIndex)
774         {
775                 #if PERF_HUD
776                         int64_t startTick = Timer::ticks();
777                 #endif
778
779                 switch(task[threadIndex].type)
780                 {
781                 case Task::PRIMITIVES:
782                         {
783                                 int unit = task[threadIndex].primitiveUnit;
784                                 
785                                 int input = primitiveProgress[unit].firstPrimitive;
786                                 int count = primitiveProgress[unit].primitiveCount;
787                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
788                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
789
790                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
791
792                                 #if PERF_HUD
793                                         int64_t time = Timer::ticks();
794                                         vertexTime[threadIndex] += time - startTick;
795                                         startTick = time;
796                                 #endif
797
798                                 int visible = setupPrimitives(this, unit, count);
799
800                                 primitiveProgress[unit].visible = visible;
801                                 primitiveProgress[unit].references = clusterCount;
802
803                                 #if PERF_HUD
804                                         setupTime[threadIndex] += Timer::ticks() - startTick;
805                                 #endif
806                         }
807                         break;
808                 case Task::PIXELS:
809                         {
810                                 int unit = task[threadIndex].primitiveUnit;
811                                 int visible = primitiveProgress[unit].visible;
812
813                                 if(visible > 0)
814                                 {
815                                         int cluster = task[threadIndex].pixelCluster;
816                                         Primitive *primitive = primitiveBatch[unit];
817                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
818                                         DrawData *data = draw->data;
819                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
820
821                                         pixelRoutine(primitive, visible, cluster, data);
822                                 }
823
824                                 finishRendering(task[threadIndex]);
825
826                                 #if PERF_HUD
827                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
828                                 #endif
829                         }
830                         break;
831                 case Task::RESUME:
832                         break;
833                 case Task::SUSPEND:
834                         break;
835                 default:
836                         ASSERT(false);
837                 }
838         }
839
840         void Renderer::synchronize()
841         {
842                 sync->lock(sw::PUBLIC);
843                 sync->unlock();
844         }
845
846         void Renderer::finishRendering(Task &pixelTask)
847         {
848                 int unit = pixelTask.primitiveUnit;
849                 int cluster = pixelTask.pixelCluster;
850
851                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
852                 DrawData &data = *draw.data;
853                 int primitive = primitiveProgress[unit].firstPrimitive;
854                 int count = primitiveProgress[unit].primitiveCount;
855
856                 pixelProgress[cluster].processedPrimitives = primitive + count;
857
858                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
859                 {
860                         pixelProgress[cluster].drawCall++;
861                         pixelProgress[cluster].processedPrimitives = 0;
862                 }
863
864                 int ref = atomicDecrement(&primitiveProgress[unit].references);
865
866                 if(ref == 0)
867                 {
868                         ref = atomicDecrement(&draw.references);
869
870                         if(ref == 0)
871                         {
872                                 #if PERF_PROFILE
873                                         for(int cluster = 0; cluster < clusterCount; cluster++)
874                                         {
875                                                 for(int i = 0; i < PERF_TIMERS; i++)
876                                                 {
877                                                         profiler.cycles[i] += data.cycles[i][cluster];
878                                                 }
879                                         }
880                                 #endif
881
882                                 if(draw.queries)
883                                 {
884                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
885                                         {
886                                                 Query *query = *q;
887
888                                                 for(int cluster = 0; cluster < clusterCount; cluster++)
889                                                 {
890                                                         atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
891                                                 }
892
893                                                 atomicDecrement(&query->reference);
894                                         }
895
896                                         delete draw.queries;
897                                         draw.queries = 0;
898                                 }
899
900                                 for(int i = 0; i < 4; i++)
901                                 {
902                                         if(draw.renderTarget[i])
903                                         {
904                                                 draw.renderTarget[i]->unlockInternal();
905                                         }
906                                 }
907
908                                 if(draw.depthStencil)
909                                 {
910                                         draw.depthStencil->unlockInternal();
911                                         draw.depthStencil->unlockStencil();
912                                 }
913
914                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
915                                 {
916                                         if(draw.texture[i])
917                                         {
918                                                 draw.texture[i]->unlock();
919                                         }
920                                 }
921
922                                 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
923                                 {
924                                         if(draw.vertexStream[i])
925                                         {
926                                                 draw.vertexStream[i]->unlock();
927                                         }
928                                 }
929
930                                 if(draw.indexBuffer)
931                                 {
932                                         draw.indexBuffer->unlock();
933                                 }
934
935                                 draw.vertexRoutine->unbind();
936                                 draw.setupRoutine->unbind();
937                                 draw.pixelRoutine->unbind();
938
939                                 sync->unlock();
940
941                                 draw.references = -1;
942                                 resumeApp->signal();
943                         }
944                 }
945
946                 pixelProgress[cluster].executing = false;
947         }
948
949         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
950         {
951                 Triangle *triangle = triangleBatch[unit];
952                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
953                 DrawData *data = draw->data;
954                 VertexTask *task = vertexTask[thread];
955
956                 const void *indices = data->indices;
957                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
958
959                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
960                 {
961                         task->vertexCache.clear();
962                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
963                 }
964
965                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
966
967                 switch(draw->drawType)
968                 {
969                 case DRAW_POINTLIST:
970                         {
971                                 unsigned int index = start;
972
973                                 for(unsigned int i = 0; i < triangleCount; i++)
974                                 {
975                                         batch[i][0] = index;
976                                         batch[i][1] = index;
977                                         batch[i][2] = index;
978
979                                         index += 1;
980                                 }
981                         }
982                         break;
983                 case DRAW_LINELIST:
984                         {
985                                 unsigned int index = 2 * start;
986
987                                 for(unsigned int i = 0; i < triangleCount; i++)
988                                 {
989                                         batch[i][0] = index + 0;
990                                         batch[i][1] = index + 1;
991                                         batch[i][2] = index + 1;
992
993                                         index += 2;
994                                 }
995                         }
996                         break;
997                 case DRAW_LINESTRIP:
998                         {
999                                 unsigned int index = start;
1000
1001                                 for(unsigned int i = 0; i < triangleCount; i++)
1002                                 {
1003                                         batch[i][0] = index + 0;
1004                                         batch[i][1] = index + 1;
1005                                         batch[i][2] = index + 1;
1006
1007                                         index += 1;
1008                                 }
1009                         }
1010                         break;
1011                 case DRAW_LINELOOP:
1012                         {
1013                                 unsigned int index = start;
1014
1015                                 for(unsigned int i = 0; i < triangleCount; i++)
1016                                 {
1017                                         batch[i][0] = (index + 0) % loop;
1018                                         batch[i][1] = (index + 1) % loop;
1019                                         batch[i][2] = (index + 1) % loop;
1020
1021                                         index += 1;
1022                                 }
1023                         }
1024                         break;
1025                 case DRAW_TRIANGLELIST:
1026                         {
1027                                 unsigned int index = 3 * start;
1028
1029                                 for(unsigned int i = 0; i < triangleCount; i++)
1030                                 {
1031                                         batch[i][0] = index + 0;
1032                                         batch[i][1] = index + 1;
1033                                         batch[i][2] = index + 2;
1034
1035                                         index += 3;
1036                                 }
1037                         }
1038                         break;
1039                 case DRAW_TRIANGLESTRIP:
1040                         {
1041                                 unsigned int index = start;
1042
1043                                 for(unsigned int i = 0; i < triangleCount; i++)
1044                                 {
1045                                         batch[i][0] = index + 0;
1046                                         batch[i][1] = index + (index & 1) + 1;
1047                                         batch[i][2] = index + (~index & 1) + 1;
1048
1049                                         index += 1;
1050                                 }
1051                         }
1052                         break;
1053                 case DRAW_TRIANGLEFAN:
1054                         {
1055                                 unsigned int index = start;
1056
1057                                 for(unsigned int i = 0; i < triangleCount; i++)
1058                                 {
1059                                         batch[i][0] = index + 1;
1060                                         batch[i][1] = index + 2;
1061                                         batch[i][2] = 0;
1062
1063                                         index += 1;
1064                                 }
1065                         }
1066                         break;
1067                 case DRAW_INDEXEDPOINTLIST8:
1068                         {
1069                                 const unsigned char *index = (const unsigned char*)indices + start;
1070
1071                                 for(unsigned int i = 0; i < triangleCount; i++)
1072                                 {
1073                                         batch[i][0] = *index;
1074                                         batch[i][1] = *index;
1075                                         batch[i][2] = *index;
1076
1077                                         index += 1;
1078                                 }
1079                         }
1080                         break;
1081                 case DRAW_INDEXEDPOINTLIST16:
1082                         {
1083                                 const unsigned short *index = (const unsigned short*)indices + start;
1084
1085                                 for(unsigned int i = 0; i < triangleCount; i++)
1086                                 {
1087                                         batch[i][0] = *index;
1088                                         batch[i][1] = *index;
1089                                         batch[i][2] = *index;
1090
1091                                         index += 1;
1092                                 }
1093                         }
1094                         break;
1095                 case DRAW_INDEXEDPOINTLIST32:
1096                         {
1097                                 const unsigned int *index = (const unsigned int*)indices + start;
1098
1099                                 for(unsigned int i = 0; i < triangleCount; i++)
1100                                 {
1101                                         batch[i][0] = *index;
1102                                         batch[i][1] = *index;
1103                                         batch[i][2] = *index;
1104
1105                                         index += 1;
1106                                 }
1107                         }
1108                         break;
1109                 case DRAW_INDEXEDLINELIST8:
1110                         {
1111                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1112
1113                                 for(unsigned int i = 0; i < triangleCount; i++)
1114                                 {
1115                                         batch[i][0] = index[0];
1116                                         batch[i][1] = index[1];
1117                                         batch[i][2] = index[1];
1118
1119                                         index += 2;
1120                                 }
1121                         }
1122                         break;
1123                 case DRAW_INDEXEDLINELIST16:
1124                         {
1125                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1126
1127                                 for(unsigned int i = 0; i < triangleCount; i++)
1128                                 {
1129                                         batch[i][0] = index[0];
1130                                         batch[i][1] = index[1];
1131                                         batch[i][2] = index[1];
1132
1133                                         index += 2;
1134                                 }
1135                         }
1136                         break;
1137                 case DRAW_INDEXEDLINELIST32:
1138                         {
1139                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1140
1141                                 for(unsigned int i = 0; i < triangleCount; i++)
1142                                 {
1143                                         batch[i][0] = index[0];
1144                                         batch[i][1] = index[1];
1145                                         batch[i][2] = index[1];
1146
1147                                         index += 2;
1148                                 }
1149                         }
1150                         break;
1151                 case DRAW_INDEXEDLINESTRIP8:
1152                         {
1153                                 const unsigned char *index = (const unsigned char*)indices + start;
1154
1155                                 for(unsigned int i = 0; i < triangleCount; i++)
1156                                 {
1157                                         batch[i][0] = index[0];
1158                                         batch[i][1] = index[1];
1159                                         batch[i][2] = index[1];
1160
1161                                         index += 1;
1162                                 }
1163                         }
1164                         break;
1165                 case DRAW_INDEXEDLINESTRIP16:
1166                         {
1167                                 const unsigned short *index = (const unsigned short*)indices + start;
1168
1169                                 for(unsigned int i = 0; i < triangleCount; i++)
1170                                 {
1171                                         batch[i][0] = index[0];
1172                                         batch[i][1] = index[1];
1173                                         batch[i][2] = index[1];
1174
1175                                         index += 1;
1176                                 }
1177                         }
1178                         break;
1179                 case DRAW_INDEXEDLINESTRIP32:
1180                         {
1181                                 const unsigned int *index = (const unsigned int*)indices + start;
1182
1183                                 for(unsigned int i = 0; i < triangleCount; i++)
1184                                 {
1185                                         batch[i][0] = index[0];
1186                                         batch[i][1] = index[1];
1187                                         batch[i][2] = index[1];
1188
1189                                         index += 1;
1190                                 }
1191                         }
1192                         break;
1193                 case DRAW_INDEXEDLINELOOP8:
1194                         {
1195                                 const unsigned char *index = (const unsigned char*)indices;
1196
1197                                 for(unsigned int i = 0; i < triangleCount; i++)
1198                                 {
1199                                         batch[i][0] = index[(start + i + 0) % loop];
1200                                         batch[i][1] = index[(start + i + 1) % loop];
1201                                         batch[i][2] = index[(start + i + 1) % loop];
1202                                 }
1203                         }
1204                         break;
1205                 case DRAW_INDEXEDLINELOOP16:
1206                         {
1207                                 const unsigned short *index = (const unsigned short*)indices;
1208
1209                                 for(unsigned int i = 0; i < triangleCount; i++)
1210                                 {
1211                                         batch[i][0] = index[(start + i + 0) % loop];
1212                                         batch[i][1] = index[(start + i + 1) % loop];
1213                                         batch[i][2] = index[(start + i + 1) % loop];
1214                                 }
1215                         }
1216                         break;
1217                 case DRAW_INDEXEDLINELOOP32:
1218                         {
1219                                 const unsigned int *index = (const unsigned int*)indices;
1220
1221                                 for(unsigned int i = 0; i < triangleCount; i++)
1222                                 {
1223                                         batch[i][0] = index[(start + i + 0) % loop];
1224                                         batch[i][1] = index[(start + i + 1) % loop];
1225                                         batch[i][2] = index[(start + i + 1) % loop];
1226                                 }
1227                         }
1228                         break;
1229                 case DRAW_INDEXEDTRIANGLELIST8:
1230                         {
1231                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1232
1233                                 for(unsigned int i = 0; i < triangleCount; i++)
1234                                 {
1235                                         batch[i][0] = index[0];
1236                                         batch[i][1] = index[1];
1237                                         batch[i][2] = index[2];
1238
1239                                         index += 3;
1240                                 }
1241                         }
1242                         break;
1243                 case DRAW_INDEXEDTRIANGLELIST16:
1244                         {
1245                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1246
1247                                 for(unsigned int i = 0; i < triangleCount; i++)
1248                                 {
1249                                         batch[i][0] = index[0];
1250                                         batch[i][1] = index[1];
1251                                         batch[i][2] = index[2];
1252
1253                                         index += 3;
1254                                 }
1255                         }
1256                         break;
1257                 case DRAW_INDEXEDTRIANGLELIST32:
1258                         {
1259                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1260
1261                                 for(unsigned int i = 0; i < triangleCount; i++)
1262                                 {
1263                                         batch[i][0] = index[0];
1264                                         batch[i][1] = index[1];
1265                                         batch[i][2] = index[2];
1266
1267                                         index += 3;
1268                                 }
1269                         }
1270                         break;
1271                 case DRAW_INDEXEDTRIANGLESTRIP8:
1272                         {
1273                                 const unsigned char *index = (const unsigned char*)indices + start;
1274
1275                                 for(unsigned int i = 0; i < triangleCount; i++)
1276                                 {
1277                                         batch[i][0] = index[0];
1278                                         batch[i][1] = index[((start + i) & 1) + 1];
1279                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1280
1281                                         index += 1;
1282                                 }
1283                         }
1284                         break;
1285                 case DRAW_INDEXEDTRIANGLESTRIP16:
1286                         {
1287                                 const unsigned short *index = (const unsigned short*)indices + start;
1288
1289                                 for(unsigned int i = 0; i < triangleCount; i++)
1290                                 {
1291                                         batch[i][0] = index[0];
1292                                         batch[i][1] = index[((start + i) & 1) + 1];
1293                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1294
1295                                         index += 1;
1296                                 }
1297                         }
1298                         break;
1299                 case DRAW_INDEXEDTRIANGLESTRIP32:
1300                         {
1301                                 const unsigned int *index = (const unsigned int*)indices + start;
1302
1303                                 for(unsigned int i = 0; i < triangleCount; i++)
1304                                 {
1305                                         batch[i][0] = index[0];
1306                                         batch[i][1] = index[((start + i) & 1) + 1];
1307                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1308
1309                                         index += 1;
1310                                 }
1311                         }
1312                         break;
1313                 case DRAW_INDEXEDTRIANGLEFAN8:
1314                         {
1315                                 const unsigned char *index = (const unsigned char*)indices;
1316
1317                                 for(unsigned int i = 0; i < triangleCount; i++)
1318                                 {
1319                                         batch[i][0] = index[start + i + 1];
1320                                         batch[i][1] = index[start + i + 2];
1321                                         batch[i][2] = index[0];
1322                                 }
1323                         }
1324                         break;
1325                 case DRAW_INDEXEDTRIANGLEFAN16:
1326                         {
1327                                 const unsigned short *index = (const unsigned short*)indices;
1328
1329                                 for(unsigned int i = 0; i < triangleCount; i++)
1330                                 {
1331                                         batch[i][0] = index[start + i + 1];
1332                                         batch[i][1] = index[start + i + 2];
1333                                         batch[i][2] = index[0];
1334                                 }
1335                         }
1336                         break;
1337                 case DRAW_INDEXEDTRIANGLEFAN32:
1338                         {
1339                                 const unsigned int *index = (const unsigned int*)indices;
1340
1341                                 for(unsigned int i = 0; i < triangleCount; i++)
1342                                 {
1343                                         batch[i][0] = index[start + i + 1];
1344                                         batch[i][1] = index[start + i + 2];
1345                                         batch[i][2] = index[0];
1346                                 }
1347                         }
1348                         break;
1349         case DRAW_QUADLIST:
1350                         {
1351                                 unsigned int index = 4 * start / 2;
1352
1353                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1354                                 {
1355                                         batch[i+0][0] = index + 0;
1356                                         batch[i+0][1] = index + 1;
1357                                         batch[i+0][2] = index + 2;
1358
1359                     batch[i+1][0] = index + 0;
1360                                         batch[i+1][1] = index + 2;
1361                                         batch[i+1][2] = index + 3;
1362
1363                                         index += 4;
1364                                 }
1365                         }
1366                         break;
1367                 default:
1368                         ASSERT(false);
1369                 }
1370
1371                 task->vertexCount = triangleCount * 3;
1372                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1373         }
1374
1375         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1376         {
1377                 Triangle *triangle = renderer->triangleBatch[unit];
1378                 Primitive *primitive = renderer->primitiveBatch[unit];
1379
1380                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1381                 SetupProcessor::State &state = draw.setupState;
1382                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1383
1384                 int ms = state.multiSample;
1385                 int pos = state.positionRegister;
1386                 const DrawData *data = draw.data;
1387                 int visible = 0;
1388
1389                 for(int i = 0; i < count; i++, triangle++)
1390                 {
1391                         Vertex &v0 = triangle->v0;
1392                         Vertex &v1 = triangle->v1;
1393                         Vertex &v2 = triangle->v2;
1394
1395                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1396                         {
1397                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1398
1399                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1400
1401                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1402                                 {
1403                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1404                                         {
1405                                                 continue;
1406                                         }
1407                                 }
1408
1409                                 if(setupRoutine(primitive, triangle, &polygon, data))
1410                                 {
1411                                         primitive += ms;
1412                                         visible++;
1413                                 }
1414                         }
1415                 }
1416
1417                 return visible;
1418         }
1419
1420         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1421         {
1422                 Triangle *triangle = renderer->triangleBatch[unit];
1423                 Primitive *primitive = renderer->primitiveBatch[unit];
1424                 int visible = 0;
1425
1426                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1427                 SetupProcessor::State &state = draw.setupState;
1428                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1429
1430                 const Vertex &v0 = triangle[0].v0;
1431                 const Vertex &v1 = triangle[0].v1;
1432                 const Vertex &v2 = triangle[0].v2;
1433
1434                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1435
1436                 if(state.cullMode == CULL_CLOCKWISE)
1437                 {
1438                         if(d >= 0) return 0;
1439                 }
1440                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1441                 {
1442                         if(d <= 0) return 0;
1443                 }
1444
1445                 // Copy attributes
1446                 triangle[1].v0 = v1;
1447                 triangle[1].v1 = v2;
1448                 triangle[2].v0 = v2;
1449                 triangle[2].v1 = v0;
1450
1451                 if(state.color[0][0].flat)   // FIXME
1452                 {
1453                         for(int i = 0; i < 2; i++)
1454                         {
1455                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1456                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1457                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1458                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1459                         }
1460                 }
1461
1462                 for(int i = 0; i < 3; i++)
1463                 {
1464                         if(setupLine(renderer, *primitive, *triangle, draw))
1465                         {
1466                                 primitive->area = 0.5f * d;
1467
1468                                 primitive++;
1469                                 visible++;
1470                         }
1471
1472                         triangle++;
1473                 }
1474
1475                 return visible;
1476         }
1477         
1478         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1479         {
1480                 Triangle *triangle = renderer->triangleBatch[unit];
1481                 Primitive *primitive = renderer->primitiveBatch[unit];
1482                 int visible = 0;
1483
1484                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1485                 SetupProcessor::State &state = draw.setupState;
1486
1487                 const Vertex &v0 = triangle[0].v0;
1488                 const Vertex &v1 = triangle[0].v1;
1489                 const Vertex &v2 = triangle[0].v2;
1490
1491                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1492
1493                 if(state.cullMode == CULL_CLOCKWISE)
1494                 {
1495                         if(d >= 0) return 0;
1496                 }
1497                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1498                 {
1499                         if(d <= 0) return 0;
1500                 }
1501
1502                 // Copy attributes
1503                 triangle[1].v0 = v1;
1504                 triangle[2].v0 = v2;
1505
1506                 for(int i = 0; i < 3; i++)
1507                 {
1508                         if(setupPoint(renderer, *primitive, *triangle, draw))
1509                         {
1510                                 primitive->area = 0.5f * d;
1511
1512                                 primitive++;
1513                                 visible++;
1514                         }
1515
1516                         triangle++;
1517                 }
1518
1519                 return visible;
1520         }
1521
1522         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1523         {
1524                 Triangle *triangle = renderer->triangleBatch[unit];
1525                 Primitive *primitive = renderer->primitiveBatch[unit];
1526                 int visible = 0;
1527
1528                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1529                 SetupProcessor::State &state = draw.setupState;
1530
1531                 int ms = state.multiSample;
1532
1533                 for(int i = 0; i < count; i++)
1534                 {
1535                         if(setupLine(renderer, *primitive, *triangle, draw))
1536                         {
1537                                 primitive += ms;
1538                                 visible++;
1539                         }
1540
1541                         triangle++;
1542                 }
1543
1544                 return visible;
1545         }
1546
1547         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1548         {
1549                 Triangle *triangle = renderer->triangleBatch[unit];
1550                 Primitive *primitive = renderer->primitiveBatch[unit];
1551                 int visible = 0;
1552
1553                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1554                 SetupProcessor::State &state = draw.setupState;
1555
1556                 int ms = state.multiSample;
1557
1558                 for(int i = 0; i < count; i++)
1559                 {
1560                         if(setupPoint(renderer, *primitive, *triangle, draw))
1561                         {
1562                                 primitive += ms;
1563                                 visible++;
1564                         }
1565
1566                         triangle++;
1567                 }
1568
1569                 return visible;
1570         }
1571
1572         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1573         {
1574                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1575                 const SetupProcessor::State &state = draw.setupState;
1576                 const DrawData &data = *draw.data;
1577
1578                 float lineWidth = data.lineWidth;
1579
1580                 Vertex &v0 = triangle.v0;
1581                 Vertex &v1 = triangle.v1;
1582
1583                 int pos = state.positionRegister;
1584
1585                 const float4 &P0 = v0.v[pos];
1586                 const float4 &P1 = v1.v[pos];
1587
1588                 if(P0.w <= 0 && P1.w <= 0)
1589                 {
1590                         return false;
1591                 }
1592
1593                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1594                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1595
1596                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1597                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1598
1599                 if(dx == 0 && dy == 0)
1600                 {
1601                         return false;
1602                 }
1603
1604                 if(false)   // Rectangle
1605                 {
1606                         float4 P[4];
1607                         int C[4];
1608
1609                         P[0] = P0;
1610                         P[1] = P1;
1611                         P[2] = P1;
1612                         P[3] = P0;
1613
1614                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1615
1616                         dx *= scale;
1617                         dy *= scale;
1618
1619                         float dx0w = dx * P0.w / W;
1620                         float dy0h = dy * P0.w / H;
1621                         float dx0h = dx * P0.w / H;
1622                         float dy0w = dy * P0.w / W;
1623
1624                         float dx1w = dx * P1.w / W;
1625                         float dy1h = dy * P1.w / H;
1626                         float dx1h = dx * P1.w / H;
1627                         float dy1w = dy * P1.w / W;
1628
1629                         P[0].x += -dy0w + -dx0w;
1630                         P[0].y += -dx0h + +dy0h;
1631                         C[0] = computeClipFlags(P[0], data);
1632
1633                         P[1].x += -dy1w + +dx1w;
1634                         P[1].y += -dx1h + +dy1h;
1635                         C[1] = computeClipFlags(P[1], data);
1636
1637                         P[2].x += +dy1w + +dx1w;
1638                         P[2].y += +dx1h + -dy1h;
1639                         C[2] = computeClipFlags(P[2], data);
1640
1641                         P[3].x += +dy0w + -dx0w;
1642                         P[3].y += +dx0h + +dy0h;
1643                         C[3] = computeClipFlags(P[3], data);
1644
1645                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1646                         {
1647                                 Polygon polygon(P, 4);
1648
1649                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1650
1651                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1652                                 {
1653                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1654                                         {
1655                                                 return false;
1656                                         }
1657                                 }
1658
1659                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1660                         }
1661                 }
1662                 else   // Diamond test convention
1663                 {
1664                         float4 P[8];
1665                         int C[8];
1666
1667                         P[0] = P0;
1668                         P[1] = P0;
1669                         P[2] = P0;
1670                         P[3] = P0;
1671                         P[4] = P1;
1672                         P[5] = P1;
1673                         P[6] = P1;
1674                         P[7] = P1;
1675
1676                         float dx0 = lineWidth * 0.5f * P0.w / W;
1677                         float dy0 = lineWidth * 0.5f * P0.w / H;
1678
1679                         float dx1 = lineWidth * 0.5f * P1.w / W;
1680                         float dy1 = lineWidth * 0.5f * P1.w / H;
1681
1682                         P[0].x += -dx0;
1683                         C[0] = computeClipFlags(P[0], data);
1684
1685                         P[1].y += +dy0;
1686                         C[1] = computeClipFlags(P[1], data);
1687
1688                         P[2].x += +dx0;
1689                         C[2] = computeClipFlags(P[2], data);
1690
1691                         P[3].y += -dy0;
1692                         C[3] = computeClipFlags(P[3], data);
1693
1694                         P[4].x += -dx1;
1695                         C[4] = computeClipFlags(P[4], data);
1696
1697                         P[5].y += +dy1;
1698                         C[5] = computeClipFlags(P[5], data);
1699
1700                         P[6].x += +dx1;
1701                         C[6] = computeClipFlags(P[6], data);
1702
1703                         P[7].y += -dy1;
1704                         C[7] = computeClipFlags(P[7], data);
1705
1706                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1707                         {
1708                                 float4 L[6];
1709
1710                                 if(dx > -dy)
1711                                 {
1712                                         if(dx > dy)   // Right
1713                                         {
1714                                                 L[0] = P[0];
1715                                                 L[1] = P[1];
1716                                                 L[2] = P[5];
1717                                                 L[3] = P[6];
1718                                                 L[4] = P[7];
1719                                                 L[5] = P[3];
1720                                         }
1721                                         else   // Down
1722                                         {
1723                                                 L[0] = P[0];
1724                                                 L[1] = P[4];
1725                                                 L[2] = P[5];
1726                                                 L[3] = P[6];
1727                                                 L[4] = P[2];
1728                                                 L[5] = P[3];
1729                                         }
1730                                 }
1731                                 else
1732                                 {
1733                                         if(dx > dy)   // Up
1734                                         {
1735                                                 L[0] = P[0];
1736                                                 L[1] = P[1];
1737                                                 L[2] = P[2];
1738                                                 L[3] = P[6];
1739                                                 L[4] = P[7];
1740                                                 L[5] = P[4];
1741                                         }
1742                                         else   // Left
1743                                         {
1744                                                 L[0] = P[1];
1745                                                 L[1] = P[2];
1746                                                 L[2] = P[3];
1747                                                 L[3] = P[7];
1748                                                 L[4] = P[4];
1749                                                 L[5] = P[5];
1750                                         }
1751                                 }
1752
1753                                 Polygon polygon(L, 6);
1754
1755                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1756
1757                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1758                                 {
1759                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1760                                         {
1761                                                 return false;
1762                                         }
1763                                 }
1764
1765                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1766                         }
1767                 }
1768
1769                 return false;
1770         }
1771
1772         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1773         {
1774                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1775                 const SetupProcessor::State &state = draw.setupState;
1776                 const DrawData &data = *draw.data;
1777
1778                 Vertex &v = triangle.v0;
1779
1780                 float pSize;
1781
1782                 int pts = state.pointSizeRegister;
1783
1784                 if(state.pointSizeRegister != 0xF)
1785                 {
1786                         pSize = v.v[pts].y;
1787                 }
1788                 else
1789                 {
1790                         pSize = data.point.pointSize[0];
1791                 }
1792
1793                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1794
1795                 float4 P[4];
1796                 int C[4];
1797
1798                 int pos = state.positionRegister;
1799
1800                 P[0] = v.v[pos];
1801                 P[1] = v.v[pos];
1802                 P[2] = v.v[pos];
1803                 P[3] = v.v[pos];
1804
1805                 const float X = pSize * P[0].w * data.halfPixelX[0];
1806                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1807
1808                 P[0].x -= X;
1809                 P[0].y += Y;
1810                 C[0] = computeClipFlags(P[0], data);
1811
1812                 P[1].x += X;
1813                 P[1].y += Y;
1814                 C[1] = computeClipFlags(P[1], data);
1815
1816                 P[2].x += X;
1817                 P[2].y -= Y;
1818                 C[2] = computeClipFlags(P[2], data);
1819
1820                 P[3].x -= X;
1821                 P[3].y -= Y;
1822                 C[3] = computeClipFlags(P[3], data);
1823
1824                 triangle.v1 = triangle.v0;
1825                 triangle.v2 = triangle.v0;
1826
1827                 triangle.v1.X += iround(16 * 0.5f * pSize);
1828                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1829
1830                 Polygon polygon(P, 4);
1831
1832                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1833                 {
1834                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1835
1836                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1837                         {
1838                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1839                                 {
1840                                         return false;
1841                                 }
1842                         }
1843                         
1844                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1845                 }
1846
1847                 return false;
1848         }
1849
1850         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1851         {
1852                 float clX = v.x + data.halfPixelX[0] * v.w;
1853                 float clY = v.y + data.halfPixelY[0] * v.w;
1854
1855                 return ((clX > v.w)  << 0) |
1856                            ((clY > v.w)  << 1) |
1857                            ((v.z > v.w)  << 2) |
1858                            ((clX < -v.w) << 3) |
1859                        ((clY < -v.w) << 4) |
1860                            ((v.z < 0)    << 5) |
1861                            Clipper::CLIP_FINITE;   // FIXME: xyz finite
1862         }
1863
1864         void Renderer::initializeThreads()
1865         {
1866                 unitCount = ceilPow2(threadCount);
1867                 clusterCount = ceilPow2(threadCount);
1868
1869                 for(int i = 0; i < unitCount; i++)
1870                 {
1871                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1872                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1873                 }
1874
1875                 for(int i = 0; i < threadCount; i++)
1876                 {
1877                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1878                         vertexTask[i]->vertexCache.drawCall = -1;
1879
1880                         task[i].type = Task::SUSPEND;
1881
1882                         resume[i] = new Event();
1883                         suspend[i] = new Event();
1884
1885                         Parameters parameters;
1886                         parameters.threadIndex = i;
1887                         parameters.renderer = this;
1888
1889                         exitThreads = false;
1890                         worker[i] = new Thread(threadFunction, &parameters);
1891
1892                         suspend[i]->wait();
1893                         suspend[i]->signal();
1894                 }
1895         }
1896
1897         void Renderer::terminateThreads()
1898         {
1899                 while(threadsAwake != 0)
1900                 {
1901                         Thread::sleep(1);
1902                 }
1903
1904                 for(int thread = 0; thread < threadCount; thread++)
1905                 {
1906                         if(worker[thread])
1907                         {
1908                                 exitThreads = true;
1909                                 resume[thread]->signal();
1910                                 worker[thread]->join();
1911                                 
1912                                 delete worker[thread];
1913                                 worker[thread] = 0;
1914                                 delete resume[thread];
1915                                 resume[thread] = 0;
1916                                 delete suspend[thread];
1917                                 suspend[thread] = 0;
1918                         }
1919                 
1920                         deallocate(vertexTask[thread]);
1921                         vertexTask[thread] = 0;
1922                 }
1923
1924                 for(int i = 0; i < 16; i++)
1925                 {
1926                         deallocate(triangleBatch[i]);
1927                         triangleBatch[i] = 0;
1928
1929                         deallocate(primitiveBatch[i]);
1930                         primitiveBatch[i] = 0;
1931                 }
1932         }
1933
1934         void Renderer::loadConstants(const VertexShader *vertexShader)
1935         {
1936                 if(!vertexShader) return;
1937
1938                 size_t count = vertexShader->getLength();
1939
1940                 for(size_t i = 0; i < count; i++)
1941                 {
1942                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1943
1944                         if(instruction->opcode == Shader::OPCODE_DEF)
1945                         {
1946                                 int index = instruction->dst.index;
1947                                 float value[4];
1948
1949                                 value[0] = instruction->src[0].value[0];
1950                                 value[1] = instruction->src[0].value[1];
1951                                 value[2] = instruction->src[0].value[2];
1952                                 value[3] = instruction->src[0].value[3];
1953
1954                                 setVertexShaderConstantF(index, value);
1955                         }
1956                         else if(instruction->opcode == Shader::OPCODE_DEFI)
1957                         {
1958                                 int index = instruction->dst.index;
1959                                 int integer[4];
1960
1961                                 integer[0] = instruction->src[0].integer[0];
1962                                 integer[1] = instruction->src[0].integer[1];
1963                                 integer[2] = instruction->src[0].integer[2];
1964                                 integer[3] = instruction->src[0].integer[3];
1965
1966                                 setVertexShaderConstantI(index, integer);
1967                         }
1968                         else if(instruction->opcode == Shader::OPCODE_DEFB)
1969                         {
1970                                 int index = instruction->dst.index;
1971                                 int boolean = instruction->src[0].boolean[0];
1972
1973                                 setVertexShaderConstantB(index, &boolean);
1974                         }
1975                 }
1976         }
1977
1978         void Renderer::loadConstants(const PixelShader *pixelShader)
1979         {
1980                 if(!pixelShader) return;
1981
1982                 size_t count = pixelShader->getLength();
1983
1984                 for(size_t i = 0; i < count; i++)
1985                 {
1986                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
1987
1988                         if(instruction->opcode == Shader::OPCODE_DEF)
1989                         {
1990                                 int index = instruction->dst.index;
1991                                 float value[4];
1992
1993                                 value[0] = instruction->src[0].value[0];
1994                                 value[1] = instruction->src[0].value[1];
1995                                 value[2] = instruction->src[0].value[2];
1996                                 value[3] = instruction->src[0].value[3];
1997
1998                                 setPixelShaderConstantF(index, value);
1999                         }
2000                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2001                         {
2002                                 int index = instruction->dst.index;
2003                                 int integer[4];
2004
2005                                 integer[0] = instruction->src[0].integer[0];
2006                                 integer[1] = instruction->src[0].integer[1];
2007                                 integer[2] = instruction->src[0].integer[2];
2008                                 integer[3] = instruction->src[0].integer[3];
2009
2010                                 setPixelShaderConstantI(index, integer);
2011                         }
2012                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2013                         {
2014                                 int index = instruction->dst.index;
2015                                 int boolean = instruction->src[0].boolean[0];
2016
2017                                 setPixelShaderConstantB(index, &boolean);
2018                         }
2019                 }
2020         }
2021
2022         void Renderer::setIndexBuffer(Resource *indexBuffer)
2023         {
2024                 context->indexBuffer = indexBuffer;
2025         }
2026
2027         void Renderer::setMultiSampleMask(unsigned int mask)
2028         {
2029                 context->sampleMask = mask;
2030         }
2031
2032         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2033         {
2034                 sw::transparencyAntialiasing = transparencyAntialiasing;
2035         }
2036
2037         bool Renderer::isReadWriteTexture(int sampler)
2038         {
2039                 for(int index = 0; index < 4; index++)
2040                 {
2041                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2042                         {
2043                                 return true;
2044                         }
2045                 }
2046         
2047                 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2048                 {
2049                         return true;
2050                 }
2051
2052                 return false;
2053         }
2054         
2055         void Renderer::updateClipper()
2056         {
2057                 if(updateClipPlanes)
2058                 {
2059                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2060                         {
2061                                 const Matrix &scissorWorld = getViewTransform();
2062
2063                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2064                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2065                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2066                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2067                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2068                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2069                         }
2070                         else   // User plane in clip space
2071                         {
2072                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2073                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2074                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2075                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2076                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2077                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2078                         }
2079
2080                         updateClipPlanes = false;
2081                 }
2082         }
2083
2084         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2085         {
2086                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2087
2088                 context->texture[sampler] = resource;
2089         }
2090
2091         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2092         {
2093                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2094                 
2095                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2096         }
2097
2098         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2099         {
2100                 if(type == SAMPLER_PIXEL)
2101                 {
2102                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2103                 }
2104                 else
2105                 {
2106                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2107                 }
2108         }
2109
2110         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2111         {
2112                 if(type == SAMPLER_PIXEL)
2113                 {
2114                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2115                 }
2116                 else
2117                 {
2118                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2119                 }
2120         }
2121
2122         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2123         {
2124                 if(type == SAMPLER_PIXEL)
2125                 {
2126                         PixelProcessor::setGatherEnable(sampler, enable);
2127                 }
2128                 else
2129                 {
2130                         VertexProcessor::setGatherEnable(sampler, enable);
2131                 }
2132         }
2133
2134         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2135         {
2136                 if(type == SAMPLER_PIXEL)
2137                 {
2138                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2139                 }
2140                 else
2141                 {
2142                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2143                 }
2144         }
2145
2146         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2147         {
2148                 if(type == SAMPLER_PIXEL)
2149                 {
2150                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2151                 }
2152                 else
2153                 {
2154                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2155                 }
2156         }
2157
2158         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2159         {
2160                 if(type == SAMPLER_PIXEL)
2161                 {
2162                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2163                 }
2164                 else
2165                 {
2166                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2167                 }
2168         }
2169
2170         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2171         {
2172                 if(type == SAMPLER_PIXEL)
2173                 {
2174                         PixelProcessor::setReadSRGB(sampler, sRGB);
2175                 }
2176                 else
2177                 {
2178                         VertexProcessor::setReadSRGB(sampler, sRGB);
2179                 }
2180         }
2181
2182         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2183         {
2184                 if(type == SAMPLER_PIXEL)
2185                 {
2186                         PixelProcessor::setMipmapLOD(sampler, bias);
2187                 }
2188                 else
2189                 {
2190                         VertexProcessor::setMipmapLOD(sampler, bias);
2191                 }
2192         }
2193
2194         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2195         {
2196                 if(type == SAMPLER_PIXEL)
2197                 {
2198                         PixelProcessor::setBorderColor(sampler, borderColor);
2199                 }
2200                 else
2201                 {
2202                         VertexProcessor::setBorderColor(sampler, borderColor);
2203                 }
2204         }
2205
2206         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2207         {
2208                 if(type == SAMPLER_PIXEL)
2209                 {
2210                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2211                 }
2212                 else
2213                 {
2214                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2215                 }
2216         }
2217
2218         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2219         {
2220                 context->setPointSpriteEnable(pointSpriteEnable);
2221         }
2222
2223         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2224         {
2225                 context->setPointScaleEnable(pointScaleEnable);
2226         }
2227
2228         void Renderer::setLineWidth(float width)
2229         {
2230                 context->lineWidth = width;
2231         }
2232
2233         void Renderer::setDepthBias(float bias)
2234         {
2235                 depthBias = bias;
2236         }
2237
2238         void Renderer::setSlopeDepthBias(float slopeBias)
2239         {
2240                 slopeDepthBias = slopeBias;
2241         }
2242
2243         void Renderer::setPixelShader(const PixelShader *shader)
2244         {
2245                 context->pixelShader = shader;
2246
2247                 loadConstants(shader);
2248         }
2249
2250         void Renderer::setVertexShader(const VertexShader *shader)
2251         {
2252                 context->vertexShader = shader;
2253
2254                 loadConstants(shader);
2255         }
2256
2257         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2258         {
2259                 for(int i = 0; i < DRAW_COUNT; i++)
2260                 {
2261                         if(drawCall[i]->psDirtyConstF < index + count)
2262                         {
2263                                 drawCall[i]->psDirtyConstF = index + count;
2264                         }
2265                 }
2266
2267                 for(int i = 0; i < count; i++)
2268                 {
2269                         PixelProcessor::setFloatConstant(index + i, value);
2270                         value += 4;
2271                 }
2272         }
2273
2274         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2275         {
2276                 for(int i = 0; i < DRAW_COUNT; i++)
2277                 {
2278                         if(drawCall[i]->psDirtyConstI < index + count)
2279                         {
2280                                 drawCall[i]->psDirtyConstI = index + count;
2281                         }
2282                 }
2283
2284                 for(int i = 0; i < count; i++)
2285                 {
2286                         PixelProcessor::setIntegerConstant(index + i, value);
2287                         value += 4;
2288                 }
2289         }
2290
2291         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2292         {
2293                 for(int i = 0; i < DRAW_COUNT; i++)
2294                 {
2295                         if(drawCall[i]->psDirtyConstB < index + count)
2296                         {
2297                                 drawCall[i]->psDirtyConstB = index + count;
2298                         }
2299                 }
2300
2301                 for(int i = 0; i < count; i++)
2302                 {
2303                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2304                         boolean++;
2305                 }
2306         }
2307
2308         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2309         {
2310                 for(int i = 0; i < DRAW_COUNT; i++)
2311                 {
2312                         if(drawCall[i]->vsDirtyConstF < index + count)
2313                         {
2314                                 drawCall[i]->vsDirtyConstF = index + count;
2315                         }
2316                 }
2317
2318                 for(int i = 0; i < count; i++)
2319                 {
2320                         VertexProcessor::setFloatConstant(index + i, value);
2321                         value += 4;
2322                 }
2323         }
2324
2325         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2326         {
2327                 for(int i = 0; i < DRAW_COUNT; i++)
2328                 {
2329                         if(drawCall[i]->vsDirtyConstI < index + count)
2330                         {
2331                                 drawCall[i]->vsDirtyConstI = index + count;
2332                         }
2333                 }
2334
2335                 for(int i = 0; i < count; i++)
2336                 {
2337                         VertexProcessor::setIntegerConstant(index + i, value);
2338                         value += 4;
2339                 }
2340         }
2341
2342         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2343         {
2344                 for(int i = 0; i < DRAW_COUNT; i++)
2345                 {
2346                         if(drawCall[i]->vsDirtyConstB < index + count)
2347                         {
2348                                 drawCall[i]->vsDirtyConstB = index + count;
2349                         }
2350                 }
2351
2352                 for(int i = 0; i < count; i++)
2353                 {
2354                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2355                         boolean++;
2356                 }
2357         }
2358
2359         void Renderer::setModelMatrix(const Matrix &M, int i)
2360         {
2361                 VertexProcessor::setModelMatrix(M, i);
2362         }
2363
2364         void Renderer::setViewMatrix(const Matrix &V)
2365         {
2366                 VertexProcessor::setViewMatrix(V);
2367                 updateClipPlanes = true;
2368         }
2369
2370         void Renderer::setBaseMatrix(const Matrix &B)
2371         {
2372                 VertexProcessor::setBaseMatrix(B);
2373                 updateClipPlanes = true;
2374         }
2375
2376         void Renderer::setProjectionMatrix(const Matrix &P)
2377         {
2378                 VertexProcessor::setProjectionMatrix(P);
2379                 updateClipPlanes = true;
2380         }
2381
2382         void Renderer::addQuery(Query *query)
2383         {
2384                 queries.push_back(query);
2385         }
2386         
2387         void Renderer::removeQuery(Query *query)
2388         {
2389                 queries.remove(query);
2390         }
2391
2392         #if PERF_HUD
2393                 int Renderer::getThreadCount()
2394                 {
2395                         return threadCount;
2396                 }
2397                 
2398                 int64_t Renderer::getVertexTime(int thread)
2399                 {
2400                         return vertexTime[thread];
2401                 }
2402
2403                 int64_t Renderer::getSetupTime(int thread)
2404                 {
2405                         return setupTime[thread];
2406                 }
2407                         
2408                 int64_t Renderer::getPixelTime(int thread)
2409                 {
2410                         return pixelTime[thread];
2411                 }
2412
2413                 void Renderer::resetTimers()
2414                 {
2415                         for(int thread = 0; thread < threadCount; thread++)
2416                         {
2417                                 vertexTime[thread] = 0;
2418                                 setupTime[thread] = 0;
2419                                 pixelTime[thread] = 0;
2420                         }
2421                 }
2422         #endif
2423
2424         void Renderer::setViewport(const Viewport &viewport)
2425         {
2426                 this->viewport = viewport;
2427         }
2428
2429         void Renderer::setScissor(const Rect &scissor)
2430         {
2431                 this->scissor = scissor;
2432         }
2433
2434         void Renderer::setClipFlags(int flags)
2435         {
2436                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2437         }
2438
2439         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2440         {
2441                 if(index < 6)
2442                 {
2443                         userPlane[index] = plane;
2444                 }
2445                 else ASSERT(false);
2446
2447                 updateClipPlanes = true;
2448         }
2449
2450         void Renderer::updateConfiguration(bool initialUpdate)
2451         {
2452                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2453
2454                 if(newConfiguration || initialUpdate)
2455                 {
2456                         terminateThreads();
2457
2458                         SwiftConfig::Configuration configuration = {0};
2459                         swiftConfig->getConfiguration(configuration);
2460
2461                         precacheVertex = !newConfiguration && configuration.precache;
2462                         precacheSetup = !newConfiguration && configuration.precache;
2463                         precachePixel = !newConfiguration && configuration.precache;
2464
2465                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2466                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2467                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2468
2469                         switch(configuration.textureSampleQuality)
2470                         {
2471                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2472                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2473                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2474                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2475                         }
2476
2477                         switch(configuration.mipmapQuality)
2478                         {
2479                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2480                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2481                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2482                         }
2483
2484                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2485
2486                         switch(configuration.transcendentalPrecision)
2487                         {
2488                         case 0:
2489                                 logPrecision = APPROXIMATE;
2490                                 expPrecision = APPROXIMATE;
2491                                 rcpPrecision = APPROXIMATE;
2492                                 rsqPrecision = APPROXIMATE;
2493                                 break;
2494                         case 1:
2495                                 logPrecision = PARTIAL;
2496                                 expPrecision = PARTIAL;
2497                                 rcpPrecision = PARTIAL;
2498                                 rsqPrecision = PARTIAL;
2499                                 break;
2500                         case 2:
2501                                 logPrecision = ACCURATE;
2502                                 expPrecision = ACCURATE;
2503                                 rcpPrecision = ACCURATE;
2504                                 rsqPrecision = ACCURATE;
2505                                 break;
2506                         case 3:
2507                                 logPrecision = WHQL;
2508                                 expPrecision = WHQL;
2509                                 rcpPrecision = WHQL;
2510                                 rsqPrecision = WHQL;
2511                                 break;
2512                         case 4:
2513                                 logPrecision = IEEE;
2514                                 expPrecision = IEEE;
2515                                 rcpPrecision = IEEE;
2516                                 rsqPrecision = IEEE;
2517                                 break;
2518                         default:
2519                                 logPrecision = ACCURATE;
2520                                 expPrecision = ACCURATE;
2521                                 rcpPrecision = ACCURATE;
2522                                 rsqPrecision = ACCURATE;
2523                                 break;
2524                         }
2525
2526                         switch(configuration.transparencyAntialiasing)
2527                         {
2528                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2529                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2530                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2531                         }
2532
2533                         switch(configuration.threadCount)
2534                         {
2535                         case -1: threadCount = CPUID::coreCount();        break;
2536                         case 0:  threadCount = CPUID::processAffinity();  break;
2537                         default: threadCount = configuration.threadCount; break;
2538                         }
2539
2540                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2541                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2542                         CPUID::setEnableSSE3(configuration.enableSSE3);
2543                         CPUID::setEnableSSE2(configuration.enableSSE2);
2544                         CPUID::setEnableSSE(configuration.enableSSE);
2545
2546                         for(int pass = 0; pass < 10; pass++)
2547                         {
2548                                 optimization[pass] = configuration.optimization[pass];
2549                         }
2550
2551                         forceWindowed = configuration.forceWindowed;
2552                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2553                         postBlendSRGB = configuration.postBlendSRGB;
2554                         exactColorRounding = configuration.exactColorRounding;
2555                         forceClearRegisters = configuration.forceClearRegisters;
2556
2557                 #ifndef NDEBUG
2558                         minPrimitives = configuration.minPrimitives;
2559                         maxPrimitives = configuration.maxPrimitives;
2560                 #endif
2561                 }
2562
2563                 if(!initialUpdate && !worker[0])
2564                 {
2565                         initializeThreads();
2566                 }
2567         }
2568 }