OSDN Git Service

glInstanceID implementation
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2012 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Renderer.hpp"
13
14 #include "Clipper.hpp"
15 #include "Math.hpp"
16 #include "FrameBuffer.hpp"
17 #include "Timer.hpp"
18 #include "Surface.hpp"
19 #include "Half.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
24 #include "CPUID.hpp"
25 #include "Memory.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
28 #include "Debug.hpp"
29 #include "Reactor/Reactor.hpp"
30
31 #include <malloc.h>
32
33 #undef max
34
35 bool disableServer = true;
36
37 #ifndef NDEBUG
38 unsigned int minPrimitives = 1;
39 unsigned int maxPrimitives = 1 << 21;
40 #endif
41
42 namespace sw
43 {
44         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
45         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
46         extern bool booleanFaceRegister;
47         extern bool fullPixelPositionRegister;
48
49         extern bool forceWindowed;
50         extern bool complementaryDepthBuffer;
51         extern bool postBlendSRGB;
52         extern bool exactColorRounding;
53         extern bool leadingVertexFirst;
54         extern TransparencyAntialiasing transparencyAntialiasing;
55         extern bool forceClearRegisters;
56
57         extern bool precacheVertex;
58         extern bool precacheSetup;
59         extern bool precachePixel;
60
61         int batchSize = 128;
62         int threadCount = 1;
63         int unitCount = 1;
64         int clusterCount = 1;
65
66         TranscendentalPrecision logPrecision = ACCURATE;
67         TranscendentalPrecision expPrecision = ACCURATE;
68         TranscendentalPrecision rcpPrecision = ACCURATE;
69         TranscendentalPrecision rsqPrecision = ACCURATE;
70         bool perspectiveCorrection = true;
71
72         struct Parameters
73         {
74                 Renderer *renderer;
75                 int threadIndex;
76         };
77
78         DrawCall::DrawCall()
79         {
80                 queries = 0;
81
82                 vsDirtyConstF = 256 + 1;
83                 vsDirtyConstI = 16;
84                 vsDirtyConstB = 16;
85
86                 psDirtyConstF = 224;
87                 psDirtyConstI = 16;
88                 psDirtyConstB = 16;
89
90                 references = -1;
91
92                 data = (DrawData*)allocate(sizeof(DrawData));
93                 data->constants = &constants;
94         }
95
96         DrawCall::~DrawCall()
97         {
98                 delete queries;
99
100                 deallocate(data);
101         }
102
103         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
104         {
105                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
106                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
107                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
108                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
109                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
110                 sw::exactColorRounding = exactColorRounding;
111
112                 setRenderTarget(0, 0);
113                 clipper = new Clipper();
114
115                 updateViewMatrix = true;
116                 updateBaseMatrix = true;
117                 updateProjectionMatrix = true;
118                 updateClipPlanes = true;
119
120                 #if PERF_HUD
121                         resetTimers();
122                 #endif
123
124                 for(int i = 0; i < 16; i++)
125                 {
126                         vertexTask[i] = 0;
127
128                         worker[i] = 0;
129                         resume[i] = 0;
130                         suspend[i] = 0;
131                 }
132
133                 threadsAwake = 0;
134                 resumeApp = new Event();
135
136                 currentDraw = 0;
137                 nextDraw = 0;
138
139                 qHead = 0;
140                 qSize = 0;
141
142                 for(int i = 0; i < 16; i++)
143                 {
144                         triangleBatch[i] = 0;
145                         primitiveBatch[i] = 0;
146                 }
147
148                 for(int draw = 0; draw < DRAW_COUNT; draw++)
149                 {
150                         drawCall[draw] = new DrawCall();
151                         drawList[draw] = drawCall[draw];
152                 }
153
154                 for(int unit = 0; unit < 16; unit++)
155                 {
156                         primitiveProgress[unit].init();
157                 }
158
159                 for(int cluster = 0; cluster < 16; cluster++)
160                 {
161                         pixelProgress[cluster].init();
162                 }
163
164                 clipFlags = 0;
165
166                 swiftConfig = new SwiftConfig(disableServer);
167                 updateConfiguration(true);
168
169                 sync = new Resource(0);
170         }
171
172         Renderer::~Renderer()
173         {
174                 sync->destruct();
175
176                 delete clipper;
177                 clipper = 0;
178
179                 terminateThreads();
180                 delete resumeApp;
181
182                 for(int draw = 0; draw < DRAW_COUNT; draw++)
183                 {
184                         delete drawCall[draw];
185                 }
186
187                 delete swiftConfig;
188         }
189
190         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
191         {
192                 blitter.blit(source, sRect, dest, dRect, filter);
193         }
194
195         void Renderer::blit3D(Surface *source, Surface *dest)
196         {
197                 blitter.blit3D(source, dest);
198         }
199
200         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
201         {
202                 #ifndef NDEBUG
203                         if(count < minPrimitives || count > maxPrimitives)
204                         {
205                                 return;
206                         }
207                 #endif
208
209                 context->drawType = drawType;
210
211                 updateConfiguration();
212                 updateClipper();
213
214                 int ss = context->getSuperSampleCount();
215                 int ms = context->getMultiSampleCount();
216
217                 for(int q = 0; q < ss; q++)
218                 {
219                         int oldMultiSampleMask = context->multiSampleMask;
220                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
221
222                         if(!context->multiSampleMask)
223                         {
224                                 continue;
225                         }
226
227                         sync->lock(sw::PRIVATE);
228
229                         Routine *vertexRoutine;
230                         Routine *setupRoutine;
231                         Routine *pixelRoutine;
232
233                         if(update || oldMultiSampleMask != context->multiSampleMask)
234                         {
235                                 vertexState = VertexProcessor::update();
236                                 setupState = SetupProcessor::update();
237                                 pixelState = PixelProcessor::update();
238
239                                 vertexRoutine = VertexProcessor::routine(vertexState);
240                                 setupRoutine = SetupProcessor::routine(setupState);
241                                 pixelRoutine = PixelProcessor::routine(pixelState);
242                         }
243
244                         int batch = batchSize / ms;
245
246                         int (*setupPrimitives)(Renderer *renderer, int batch, int count);
247
248                         if(context->isDrawTriangle())
249                         {
250                                 switch(context->fillMode)
251                                 {
252                                 case FILL_SOLID:
253                                         setupPrimitives = setupSolidTriangles;
254                                         break;
255                                 case FILL_WIREFRAME:
256                                         setupPrimitives = setupWireframeTriangle;
257                                         batch = 1;
258                                         break;
259                                 case FILL_VERTEX:
260                                         setupPrimitives = setupVertexTriangle;
261                                         batch = 1;
262                                         break;
263                                 default: ASSERT(false);
264                                 }
265                         }
266                         else if(context->isDrawLine())
267                         {
268                                 setupPrimitives = setupLines;
269                         }
270                         else   // Point draw
271                         {
272                                 setupPrimitives = setupPoints;
273                         }
274
275                         DrawCall *draw = 0;
276
277                         do
278                         {
279                                 for(int i = 0; i < DRAW_COUNT; i++)
280                                 {
281                                         if(drawCall[i]->references == -1)
282                                         {
283                                                 draw = drawCall[i];
284                                                 drawList[nextDraw % DRAW_COUNT] = draw;
285
286                                                 break;
287                                         }
288                                 }
289
290                                 if(!draw)
291                                 {
292                                         resumeApp->wait();
293                                 }
294                         }
295                         while(!draw);
296
297                         DrawData *data = draw->data;
298
299                         if(queries.size() != 0)
300                         {
301                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
302                                 {
303                                         atomicIncrement(&(*query)->reference);
304                                 }
305
306                                 draw->queries = new std::list<Query*>(queries);
307                         }
308
309                         draw->drawType = drawType;
310                         draw->batchSize = batch;
311
312                         vertexRoutine->bind();
313                         setupRoutine->bind();
314                         pixelRoutine->bind();
315
316                         draw->vertexRoutine = vertexRoutine;
317                         draw->setupRoutine = setupRoutine;
318                         draw->pixelRoutine = pixelRoutine;
319                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
320                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
321                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
322                         draw->setupPrimitives = setupPrimitives;
323                         draw->setupState = setupState;
324
325                         for(int i = 0; i < 16; i++)
326                         {
327                                 draw->vertexStream[i] = context->input[i].resource;
328                                 data->input[i] = context->input[i].buffer;
329                                 data->stride[i] = context->input[i].stride;
330
331                                 if(draw->vertexStream[i])
332                                 {
333                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
334                                 }
335                         }
336
337                         if(context->indexBuffer)
338                         {
339                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
340                         }
341
342                         draw->indexBuffer = context->indexBuffer;
343
344                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
345                         {
346                                 draw->texture[sampler] = 0;
347                         }
348
349                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
350                         {
351                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
352                                 {
353                                         draw->texture[sampler] = context->texture[sampler];
354                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
355
356                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
357                                 }
358                         }
359
360                         if(context->pixelShader)
361                         {
362                                 if(draw->psDirtyConstF)
363                                 {
364                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
365                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
366                                         draw->psDirtyConstF = 0;
367                                 }
368
369                                 if(draw->psDirtyConstI)
370                                 {
371                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
372                                         draw->psDirtyConstI = 0;
373                                 }
374
375                                 if(draw->psDirtyConstB)
376                                 {
377                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
378                                         draw->psDirtyConstB = 0;
379                                 }
380                         }
381                         
382                         if(context->pixelShaderVersion() <= 0x0104)
383                         {
384                                 for(int stage = 0; stage < 8; stage++)
385                                 {
386                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
387                                         {
388                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
389                                         }
390                                         else break;
391                                 }
392                         }
393
394                         if(context->vertexShader)
395                         {
396                                 if(context->vertexShader->getVersion() >= 0x0300)
397                                 {
398                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
399                                         {
400                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
401                                                 {
402                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
403                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
404
405                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
406                                                 }
407                                         }
408                                 }
409
410                                 if(draw->vsDirtyConstF)
411                                 {
412                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
413                                         draw->vsDirtyConstF = 0;
414                                 }
415
416                                 if(draw->vsDirtyConstI)
417                                 {
418                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
419                                         draw->vsDirtyConstI = 0;
420                                 }
421
422                                 if(draw->vsDirtyConstB)
423                                 {
424                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
425                                         draw->vsDirtyConstB = 0;
426                                 }
427
428                                 if(context->vertexShader->instanceIdDeclared)
429                                 {
430                                         data->instanceID = context->instanceID;
431                                 }
432                         }
433                         else
434                         {
435                                 data->ff = ff;
436
437                                 draw->vsDirtyConstF = 256 + 1;
438                                 draw->vsDirtyConstI = 16;
439                                 draw->vsDirtyConstB = 16;
440                         }
441
442                         if(pixelState.stencilActive)
443                         {
444                                 data->stencil[0] = stencil;
445                                 data->stencil[1] = stencilCCW;
446                         }
447
448                         if(pixelState.fogActive)
449                         {
450                                 data->fog = fog;
451                         }
452
453                         if(setupState.isDrawPoint)
454                         {
455                                 data->point = point;
456                         }
457
458                         data->lineWidth = context->lineWidth;
459
460                         data->factor = factor;
461
462                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
463                         {
464                                 float ref = (float)context->alphaReference * (1.0f / 255.0f);
465                                 float margin = sw::min(ref, 1.0f - ref);
466
467                                 if(ms == 4)
468                                 {
469                                         data->a2c0 = replicate(ref - margin * 0.6f);
470                                         data->a2c1 = replicate(ref - margin * 0.2f);
471                                         data->a2c2 = replicate(ref + margin * 0.2f);
472                                         data->a2c3 = replicate(ref + margin * 0.6f);
473                                 }
474                                 else if(ms == 2)
475                                 {
476                                         data->a2c0 = replicate(ref - margin * 0.3f);
477                                         data->a2c1 = replicate(ref + margin * 0.3f);
478                                 }
479                                 else ASSERT(false);
480                         }
481
482                         if(pixelState.occlusionEnabled)
483                         {
484                                 for(int cluster = 0; cluster < clusterCount; cluster++)
485                                 {
486                                         data->occlusion[cluster] = 0;
487                                 }
488                         }
489
490                         #if PERF_PROFILE
491                                 for(int cluster = 0; cluster < clusterCount; cluster++)
492                                 {
493                                         for(int i = 0; i < PERF_TIMERS; i++)
494                                         {
495                                                 data->cycles[i][cluster] = 0;
496                                         }
497                                 }
498                         #endif
499
500                         // Viewport
501                         {
502                                 float W = 0.5f * viewport.width;
503                                 float H = 0.5f * viewport.height;
504                                 float X0 = viewport.x0 + W;
505                                 float Y0 = viewport.y0 + H;
506                                 float N = viewport.minZ;
507                                 float F = viewport.maxZ;
508                                 float Z = F - N;
509
510                                 if(context->isDrawTriangle(false))
511                                 {
512                                         N += depthBias;
513                                 }
514
515                                 if(complementaryDepthBuffer)
516                                 {
517                                         Z = -Z;
518                                         N = 1 - N;
519                                 }
520
521                                 static const float X[5][16] =   // Fragment offsets
522                                 {
523                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
524                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
525                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
526                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
527                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
528                                 };
529
530                                 static const float Y[5][16] =   // Fragment offsets
531                                 {
532                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
533                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
534                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
535                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
536                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
537                                 };
538
539                                 int s = sw::log2(ss);
540
541                                 data->Wx16 = replicate(W * 16);
542                                 data->Hx16 = replicate(H * 16);
543                                 data->X0x16 = replicate(X0 * 16);
544                                 data->Y0x16 = replicate(Y0 * 16);
545                                 data->XXXX = replicate(X[s][q] / W);
546                                 data->YYYY = replicate(Y[s][q] / H);
547                                 data->halfPixelX = replicate(0.5f / W);
548                                 data->halfPixelY = replicate(0.5f / H);
549                                 data->viewportHeight = abs(viewport.height);
550                                 data->slopeDepthBias = slopeDepthBias;
551                                 data->depthRange = Z;
552                                 data->depthNear = N;
553                                 draw->clipFlags = clipFlags;
554
555                                 if(clipFlags)
556                                 {
557                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
558                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
559                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
560                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
561                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
562                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
563                                 }
564                         }
565
566                         // Target
567                         {
568                                 for(int index = 0; index < 4; index++)
569                                 {
570                                         draw->renderTarget[index] = context->renderTarget[index];
571
572                                         if(draw->renderTarget[index])
573                                         {
574                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
575                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
576                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
577                                         }
578                                 }
579
580                                 draw->depthStencil = context->depthStencil;
581
582                                 if(draw->depthStencil)
583                                 {
584                                         data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
585                                         data->depthPitchB = context->depthStencil->getInternalPitchB();
586                                         data->depthSliceB = context->depthStencil->getInternalSliceB();
587
588                                         data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
589                                         data->stencilPitchB = context->depthStencil->getStencilPitchB();
590                                         data->stencilSliceB = context->depthStencil->getStencilSliceB();
591                                 }
592                         }
593
594                         // Scissor
595                         {
596                                 data->scissorX0 = scissor.x0;
597                                 data->scissorX1 = scissor.x1;
598                                 data->scissorY0 = scissor.y0;
599                                 data->scissorY1 = scissor.y1;
600                         }
601
602                         draw->primitive = 0;
603                         draw->count = count;
604
605                         draw->references = (count + batch - 1) / batch;
606
607                         schedulerMutex.lock();
608                         nextDraw++;
609                         schedulerMutex.unlock();
610
611                         if(!threadsAwake)
612                         {
613                                 suspend[0]->wait();
614
615                                 threadsAwake = 1;
616                                 task[0].type = Task::RESUME;
617
618                                 resume[0]->signal();
619                         }
620                 }
621         }
622
623         void Renderer::threadFunction(void *parameters)
624         {
625                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
626                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
627
628                 if(logPrecision < IEEE)
629                 {
630                         CPUID::setFlushToZero(true);
631                         CPUID::setDenormalsAreZero(true);
632                 }
633
634                 renderer->threadLoop(threadIndex);
635         }
636
637         void Renderer::threadLoop(int threadIndex)
638         {
639                 while(!exitThreads)
640                 {
641                         taskLoop(threadIndex);
642
643                         suspend[threadIndex]->signal();
644                         resume[threadIndex]->wait();
645                 }
646         }
647
648         void Renderer::taskLoop(int threadIndex)
649         {
650                 while(task[threadIndex].type != Task::SUSPEND)
651                 {
652                         scheduleTask(threadIndex);
653                         executeTask(threadIndex);
654                 }
655         }
656
657         void Renderer::findAvailableTasks()
658         {
659                 // Find pixel tasks
660                 for(int cluster = 0; cluster < clusterCount; cluster++)
661                 {
662                         if(!pixelProgress[cluster].executing)
663                         {
664                                 for(int unit = 0; unit < unitCount; unit++)
665                                 {
666                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
667                                         {
668                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
669                                                 {
670                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
671                                                         {
672                                                                 Task &task = taskQueue[qHead];
673                                                                 task.type = Task::PIXELS;
674                                                                 task.primitiveUnit = unit;
675                                                                 task.pixelCluster = cluster;
676
677                                                                 pixelProgress[cluster].executing = true;
678
679                                                                 // Commit to the task queue
680                                                                 qHead = (qHead + 1) % 32;
681                                                                 qSize++;
682
683                                                                 break;
684                                                         }
685                                                 }
686                                         }
687                                 }
688                         }
689                 }
690         
691                 // Find primitive tasks
692                 if(currentDraw == nextDraw)
693                 {
694                         return;   // No more primitives to process
695                 }
696
697                 for(int unit = 0; unit < unitCount; unit++)
698                 {
699                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
700
701                         if(draw->primitive >= draw->count)
702                         {
703                                 currentDraw++;
704
705                                 if(currentDraw == nextDraw)
706                                 {
707                                         return;   // No more primitives to process
708                                 }
709
710                                 draw = drawList[currentDraw % DRAW_COUNT];
711                         }
712
713                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
714                         {
715                                 int primitive = draw->primitive;
716                                 int count = draw->count;
717                                 int batch = draw->batchSize;
718
719                                 primitiveProgress[unit].drawCall = currentDraw;
720                                 primitiveProgress[unit].firstPrimitive = primitive;
721                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
722
723                                 draw->primitive += batch;
724
725                                 Task &task = taskQueue[qHead];
726                                 task.type = Task::PRIMITIVES;
727                                 task.primitiveUnit = unit;
728
729                                 primitiveProgress[unit].references = -1;
730
731                                 // Commit to the task queue
732                                 qHead = (qHead + 1) % 32;
733                                 qSize++;
734                         }
735                 }
736         }
737
738         void Renderer::scheduleTask(int threadIndex)
739         {
740                 schedulerMutex.lock();
741
742                 if((int)qSize < threadCount - threadsAwake + 1)
743                 {
744                         findAvailableTasks();
745                 }
746
747                 if(qSize != 0)
748                 {
749                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
750                         qSize--;
751
752                         if(threadsAwake != threadCount)
753                         {
754                                 int wakeup = qSize - threadsAwake + 1;
755
756                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
757                                 {
758                                         if(task[i].type == Task::SUSPEND)
759                                         {
760                                                 suspend[i]->wait();
761                                                 task[i].type = Task::RESUME;
762                                                 resume[i]->signal();
763
764                                                 threadsAwake++;
765                                                 wakeup--;
766                                         }
767                                 }
768                         }
769                 }
770                 else
771                 {
772                         task[threadIndex].type = Task::SUSPEND;
773
774                         threadsAwake--;
775                 }
776
777                 schedulerMutex.unlock();
778         }
779
780         void Renderer::executeTask(int threadIndex)
781         {
782                 #if PERF_HUD
783                         int64_t startTick = Timer::ticks();
784                 #endif
785
786                 switch(task[threadIndex].type)
787                 {
788                 case Task::PRIMITIVES:
789                         {
790                                 int unit = task[threadIndex].primitiveUnit;
791                                 
792                                 int input = primitiveProgress[unit].firstPrimitive;
793                                 int count = primitiveProgress[unit].primitiveCount;
794                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
795                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
796
797                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
798
799                                 #if PERF_HUD
800                                         int64_t time = Timer::ticks();
801                                         vertexTime[threadIndex] += time - startTick;
802                                         startTick = time;
803                                 #endif
804
805                                 int visible = setupPrimitives(this, unit, count);
806
807                                 primitiveProgress[unit].visible = visible;
808                                 primitiveProgress[unit].references = clusterCount;
809
810                                 #if PERF_HUD
811                                         setupTime[threadIndex] += Timer::ticks() - startTick;
812                                 #endif
813                         }
814                         break;
815                 case Task::PIXELS:
816                         {
817                                 int unit = task[threadIndex].primitiveUnit;
818                                 int visible = primitiveProgress[unit].visible;
819
820                                 if(visible > 0)
821                                 {
822                                         int cluster = task[threadIndex].pixelCluster;
823                                         Primitive *primitive = primitiveBatch[unit];
824                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
825                                         DrawData *data = draw->data;
826                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
827
828                                         pixelRoutine(primitive, visible, cluster, data);
829                                 }
830
831                                 finishRendering(task[threadIndex]);
832
833                                 #if PERF_HUD
834                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
835                                 #endif
836                         }
837                         break;
838                 case Task::RESUME:
839                         break;
840                 case Task::SUSPEND:
841                         break;
842                 default:
843                         ASSERT(false);
844                 }
845         }
846
847         void Renderer::synchronize()
848         {
849                 sync->lock(sw::PUBLIC);
850                 sync->unlock();
851         }
852
853         void Renderer::finishRendering(Task &pixelTask)
854         {
855                 int unit = pixelTask.primitiveUnit;
856                 int cluster = pixelTask.pixelCluster;
857
858                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
859                 DrawData &data = *draw.data;
860                 int primitive = primitiveProgress[unit].firstPrimitive;
861                 int count = primitiveProgress[unit].primitiveCount;
862
863                 pixelProgress[cluster].processedPrimitives = primitive + count;
864
865                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
866                 {
867                         pixelProgress[cluster].drawCall++;
868                         pixelProgress[cluster].processedPrimitives = 0;
869                 }
870
871                 int ref = atomicDecrement(&primitiveProgress[unit].references);
872
873                 if(ref == 0)
874                 {
875                         ref = atomicDecrement(&draw.references);
876
877                         if(ref == 0)
878                         {
879                                 #if PERF_PROFILE
880                                         for(int cluster = 0; cluster < clusterCount; cluster++)
881                                         {
882                                                 for(int i = 0; i < PERF_TIMERS; i++)
883                                                 {
884                                                         profiler.cycles[i] += data.cycles[i][cluster];
885                                                 }
886                                         }
887                                 #endif
888
889                                 if(draw.queries)
890                                 {
891                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
892                                         {
893                                                 Query *query = *q;
894
895                                                 for(int cluster = 0; cluster < clusterCount; cluster++)
896                                                 {
897                                                         atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
898                                                 }
899
900                                                 atomicDecrement(&query->reference);
901                                         }
902
903                                         delete draw.queries;
904                                         draw.queries = 0;
905                                 }
906
907                                 for(int i = 0; i < 4; i++)
908                                 {
909                                         if(draw.renderTarget[i])
910                                         {
911                                                 draw.renderTarget[i]->unlockInternal();
912                                         }
913                                 }
914
915                                 if(draw.depthStencil)
916                                 {
917                                         draw.depthStencil->unlockInternal();
918                                         draw.depthStencil->unlockStencil();
919                                 }
920
921                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
922                                 {
923                                         if(draw.texture[i])
924                                         {
925                                                 draw.texture[i]->unlock();
926                                         }
927                                 }
928
929                                 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
930                                 {
931                                         if(draw.vertexStream[i])
932                                         {
933                                                 draw.vertexStream[i]->unlock();
934                                         }
935                                 }
936
937                                 if(draw.indexBuffer)
938                                 {
939                                         draw.indexBuffer->unlock();
940                                 }
941
942                                 draw.vertexRoutine->unbind();
943                                 draw.setupRoutine->unbind();
944                                 draw.pixelRoutine->unbind();
945
946                                 sync->unlock();
947
948                                 draw.references = -1;
949                                 resumeApp->signal();
950                         }
951                 }
952
953                 pixelProgress[cluster].executing = false;
954         }
955
956         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
957         {
958                 Triangle *triangle = triangleBatch[unit];
959                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
960                 DrawData *data = draw->data;
961                 VertexTask *task = vertexTask[thread];
962
963                 const void *indices = data->indices;
964                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
965
966                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
967                 {
968                         task->vertexCache.clear();
969                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
970                 }
971
972                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
973
974                 switch(draw->drawType)
975                 {
976                 case DRAW_POINTLIST:
977                         {
978                                 unsigned int index = start;
979
980                                 for(unsigned int i = 0; i < triangleCount; i++)
981                                 {
982                                         batch[i][0] = index;
983                                         batch[i][1] = index;
984                                         batch[i][2] = index;
985
986                                         index += 1;
987                                 }
988                         }
989                         break;
990                 case DRAW_LINELIST:
991                         {
992                                 unsigned int index = 2 * start;
993
994                                 for(unsigned int i = 0; i < triangleCount; i++)
995                                 {
996                                         batch[i][0] = index + 0;
997                                         batch[i][1] = index + 1;
998                                         batch[i][2] = index + 1;
999
1000                                         index += 2;
1001                                 }
1002                         }
1003                         break;
1004                 case DRAW_LINESTRIP:
1005                         {
1006                                 unsigned int index = start;
1007
1008                                 for(unsigned int i = 0; i < triangleCount; i++)
1009                                 {
1010                                         batch[i][0] = index + 0;
1011                                         batch[i][1] = index + 1;
1012                                         batch[i][2] = index + 1;
1013
1014                                         index += 1;
1015                                 }
1016                         }
1017                         break;
1018                 case DRAW_LINELOOP:
1019                         {
1020                                 unsigned int index = start;
1021
1022                                 for(unsigned int i = 0; i < triangleCount; i++)
1023                                 {
1024                                         batch[i][0] = (index + 0) % loop;
1025                                         batch[i][1] = (index + 1) % loop;
1026                                         batch[i][2] = (index + 1) % loop;
1027
1028                                         index += 1;
1029                                 }
1030                         }
1031                         break;
1032                 case DRAW_TRIANGLELIST:
1033                         {
1034                                 unsigned int index = 3 * start;
1035
1036                                 for(unsigned int i = 0; i < triangleCount; i++)
1037                                 {
1038                                         batch[i][0] = index + 0;
1039                                         batch[i][1] = index + 1;
1040                                         batch[i][2] = index + 2;
1041
1042                                         index += 3;
1043                                 }
1044                         }
1045                         break;
1046                 case DRAW_TRIANGLESTRIP:
1047                         {
1048                                 unsigned int index = start;
1049
1050                                 for(unsigned int i = 0; i < triangleCount; i++)
1051                                 {
1052                                         batch[i][0] = index + 0;
1053                                         batch[i][1] = index + (index & 1) + 1;
1054                                         batch[i][2] = index + (~index & 1) + 1;
1055
1056                                         index += 1;
1057                                 }
1058                         }
1059                         break;
1060                 case DRAW_TRIANGLEFAN:
1061                         {
1062                                 unsigned int index = start;
1063
1064                                 for(unsigned int i = 0; i < triangleCount; i++)
1065                                 {
1066                                         batch[i][0] = index + 1;
1067                                         batch[i][1] = index + 2;
1068                                         batch[i][2] = 0;
1069
1070                                         index += 1;
1071                                 }
1072                         }
1073                         break;
1074                 case DRAW_INDEXEDPOINTLIST8:
1075                         {
1076                                 const unsigned char *index = (const unsigned char*)indices + start;
1077
1078                                 for(unsigned int i = 0; i < triangleCount; i++)
1079                                 {
1080                                         batch[i][0] = *index;
1081                                         batch[i][1] = *index;
1082                                         batch[i][2] = *index;
1083
1084                                         index += 1;
1085                                 }
1086                         }
1087                         break;
1088                 case DRAW_INDEXEDPOINTLIST16:
1089                         {
1090                                 const unsigned short *index = (const unsigned short*)indices + start;
1091
1092                                 for(unsigned int i = 0; i < triangleCount; i++)
1093                                 {
1094                                         batch[i][0] = *index;
1095                                         batch[i][1] = *index;
1096                                         batch[i][2] = *index;
1097
1098                                         index += 1;
1099                                 }
1100                         }
1101                         break;
1102                 case DRAW_INDEXEDPOINTLIST32:
1103                         {
1104                                 const unsigned int *index = (const unsigned int*)indices + start;
1105
1106                                 for(unsigned int i = 0; i < triangleCount; i++)
1107                                 {
1108                                         batch[i][0] = *index;
1109                                         batch[i][1] = *index;
1110                                         batch[i][2] = *index;
1111
1112                                         index += 1;
1113                                 }
1114                         }
1115                         break;
1116                 case DRAW_INDEXEDLINELIST8:
1117                         {
1118                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1119
1120                                 for(unsigned int i = 0; i < triangleCount; i++)
1121                                 {
1122                                         batch[i][0] = index[0];
1123                                         batch[i][1] = index[1];
1124                                         batch[i][2] = index[1];
1125
1126                                         index += 2;
1127                                 }
1128                         }
1129                         break;
1130                 case DRAW_INDEXEDLINELIST16:
1131                         {
1132                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1133
1134                                 for(unsigned int i = 0; i < triangleCount; i++)
1135                                 {
1136                                         batch[i][0] = index[0];
1137                                         batch[i][1] = index[1];
1138                                         batch[i][2] = index[1];
1139
1140                                         index += 2;
1141                                 }
1142                         }
1143                         break;
1144                 case DRAW_INDEXEDLINELIST32:
1145                         {
1146                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1147
1148                                 for(unsigned int i = 0; i < triangleCount; i++)
1149                                 {
1150                                         batch[i][0] = index[0];
1151                                         batch[i][1] = index[1];
1152                                         batch[i][2] = index[1];
1153
1154                                         index += 2;
1155                                 }
1156                         }
1157                         break;
1158                 case DRAW_INDEXEDLINESTRIP8:
1159                         {
1160                                 const unsigned char *index = (const unsigned char*)indices + start;
1161
1162                                 for(unsigned int i = 0; i < triangleCount; i++)
1163                                 {
1164                                         batch[i][0] = index[0];
1165                                         batch[i][1] = index[1];
1166                                         batch[i][2] = index[1];
1167
1168                                         index += 1;
1169                                 }
1170                         }
1171                         break;
1172                 case DRAW_INDEXEDLINESTRIP16:
1173                         {
1174                                 const unsigned short *index = (const unsigned short*)indices + start;
1175
1176                                 for(unsigned int i = 0; i < triangleCount; i++)
1177                                 {
1178                                         batch[i][0] = index[0];
1179                                         batch[i][1] = index[1];
1180                                         batch[i][2] = index[1];
1181
1182                                         index += 1;
1183                                 }
1184                         }
1185                         break;
1186                 case DRAW_INDEXEDLINESTRIP32:
1187                         {
1188                                 const unsigned int *index = (const unsigned int*)indices + start;
1189
1190                                 for(unsigned int i = 0; i < triangleCount; i++)
1191                                 {
1192                                         batch[i][0] = index[0];
1193                                         batch[i][1] = index[1];
1194                                         batch[i][2] = index[1];
1195
1196                                         index += 1;
1197                                 }
1198                         }
1199                         break;
1200                 case DRAW_INDEXEDLINELOOP8:
1201                         {
1202                                 const unsigned char *index = (const unsigned char*)indices;
1203
1204                                 for(unsigned int i = 0; i < triangleCount; i++)
1205                                 {
1206                                         batch[i][0] = index[(start + i + 0) % loop];
1207                                         batch[i][1] = index[(start + i + 1) % loop];
1208                                         batch[i][2] = index[(start + i + 1) % loop];
1209                                 }
1210                         }
1211                         break;
1212                 case DRAW_INDEXEDLINELOOP16:
1213                         {
1214                                 const unsigned short *index = (const unsigned short*)indices;
1215
1216                                 for(unsigned int i = 0; i < triangleCount; i++)
1217                                 {
1218                                         batch[i][0] = index[(start + i + 0) % loop];
1219                                         batch[i][1] = index[(start + i + 1) % loop];
1220                                         batch[i][2] = index[(start + i + 1) % loop];
1221                                 }
1222                         }
1223                         break;
1224                 case DRAW_INDEXEDLINELOOP32:
1225                         {
1226                                 const unsigned int *index = (const unsigned int*)indices;
1227
1228                                 for(unsigned int i = 0; i < triangleCount; i++)
1229                                 {
1230                                         batch[i][0] = index[(start + i + 0) % loop];
1231                                         batch[i][1] = index[(start + i + 1) % loop];
1232                                         batch[i][2] = index[(start + i + 1) % loop];
1233                                 }
1234                         }
1235                         break;
1236                 case DRAW_INDEXEDTRIANGLELIST8:
1237                         {
1238                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1239
1240                                 for(unsigned int i = 0; i < triangleCount; i++)
1241                                 {
1242                                         batch[i][0] = index[0];
1243                                         batch[i][1] = index[1];
1244                                         batch[i][2] = index[2];
1245
1246                                         index += 3;
1247                                 }
1248                         }
1249                         break;
1250                 case DRAW_INDEXEDTRIANGLELIST16:
1251                         {
1252                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1253
1254                                 for(unsigned int i = 0; i < triangleCount; i++)
1255                                 {
1256                                         batch[i][0] = index[0];
1257                                         batch[i][1] = index[1];
1258                                         batch[i][2] = index[2];
1259
1260                                         index += 3;
1261                                 }
1262                         }
1263                         break;
1264                 case DRAW_INDEXEDTRIANGLELIST32:
1265                         {
1266                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1267
1268                                 for(unsigned int i = 0; i < triangleCount; i++)
1269                                 {
1270                                         batch[i][0] = index[0];
1271                                         batch[i][1] = index[1];
1272                                         batch[i][2] = index[2];
1273
1274                                         index += 3;
1275                                 }
1276                         }
1277                         break;
1278                 case DRAW_INDEXEDTRIANGLESTRIP8:
1279                         {
1280                                 const unsigned char *index = (const unsigned char*)indices + start;
1281
1282                                 for(unsigned int i = 0; i < triangleCount; i++)
1283                                 {
1284                                         batch[i][0] = index[0];
1285                                         batch[i][1] = index[((start + i) & 1) + 1];
1286                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1287
1288                                         index += 1;
1289                                 }
1290                         }
1291                         break;
1292                 case DRAW_INDEXEDTRIANGLESTRIP16:
1293                         {
1294                                 const unsigned short *index = (const unsigned short*)indices + start;
1295
1296                                 for(unsigned int i = 0; i < triangleCount; i++)
1297                                 {
1298                                         batch[i][0] = index[0];
1299                                         batch[i][1] = index[((start + i) & 1) + 1];
1300                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1301
1302                                         index += 1;
1303                                 }
1304                         }
1305                         break;
1306                 case DRAW_INDEXEDTRIANGLESTRIP32:
1307                         {
1308                                 const unsigned int *index = (const unsigned int*)indices + start;
1309
1310                                 for(unsigned int i = 0; i < triangleCount; i++)
1311                                 {
1312                                         batch[i][0] = index[0];
1313                                         batch[i][1] = index[((start + i) & 1) + 1];
1314                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1315
1316                                         index += 1;
1317                                 }
1318                         }
1319                         break;
1320                 case DRAW_INDEXEDTRIANGLEFAN8:
1321                         {
1322                                 const unsigned char *index = (const unsigned char*)indices;
1323
1324                                 for(unsigned int i = 0; i < triangleCount; i++)
1325                                 {
1326                                         batch[i][0] = index[start + i + 1];
1327                                         batch[i][1] = index[start + i + 2];
1328                                         batch[i][2] = index[0];
1329                                 }
1330                         }
1331                         break;
1332                 case DRAW_INDEXEDTRIANGLEFAN16:
1333                         {
1334                                 const unsigned short *index = (const unsigned short*)indices;
1335
1336                                 for(unsigned int i = 0; i < triangleCount; i++)
1337                                 {
1338                                         batch[i][0] = index[start + i + 1];
1339                                         batch[i][1] = index[start + i + 2];
1340                                         batch[i][2] = index[0];
1341                                 }
1342                         }
1343                         break;
1344                 case DRAW_INDEXEDTRIANGLEFAN32:
1345                         {
1346                                 const unsigned int *index = (const unsigned int*)indices;
1347
1348                                 for(unsigned int i = 0; i < triangleCount; i++)
1349                                 {
1350                                         batch[i][0] = index[start + i + 1];
1351                                         batch[i][1] = index[start + i + 2];
1352                                         batch[i][2] = index[0];
1353                                 }
1354                         }
1355                         break;
1356         case DRAW_QUADLIST:
1357                         {
1358                                 unsigned int index = 4 * start / 2;
1359
1360                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1361                                 {
1362                                         batch[i+0][0] = index + 0;
1363                                         batch[i+0][1] = index + 1;
1364                                         batch[i+0][2] = index + 2;
1365
1366                     batch[i+1][0] = index + 0;
1367                                         batch[i+1][1] = index + 2;
1368                                         batch[i+1][2] = index + 3;
1369
1370                                         index += 4;
1371                                 }
1372                         }
1373                         break;
1374                 default:
1375                         ASSERT(false);
1376                 }
1377
1378                 task->vertexCount = triangleCount * 3;
1379                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1380         }
1381
1382         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1383         {
1384                 Triangle *triangle = renderer->triangleBatch[unit];
1385                 Primitive *primitive = renderer->primitiveBatch[unit];
1386
1387                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1388                 SetupProcessor::State &state = draw.setupState;
1389                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1390
1391                 int ms = state.multiSample;
1392                 int pos = state.positionRegister;
1393                 const DrawData *data = draw.data;
1394                 int visible = 0;
1395
1396                 for(int i = 0; i < count; i++, triangle++)
1397                 {
1398                         Vertex &v0 = triangle->v0;
1399                         Vertex &v1 = triangle->v1;
1400                         Vertex &v2 = triangle->v2;
1401
1402                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1403                         {
1404                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1405
1406                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1407
1408                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1409                                 {
1410                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1411                                         {
1412                                                 continue;
1413                                         }
1414                                 }
1415
1416                                 if(setupRoutine(primitive, triangle, &polygon, data))
1417                                 {
1418                                         primitive += ms;
1419                                         visible++;
1420                                 }
1421                         }
1422                 }
1423
1424                 return visible;
1425         }
1426
1427         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1428         {
1429                 Triangle *triangle = renderer->triangleBatch[unit];
1430                 Primitive *primitive = renderer->primitiveBatch[unit];
1431                 int visible = 0;
1432
1433                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1434                 SetupProcessor::State &state = draw.setupState;
1435                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1436
1437                 const Vertex &v0 = triangle[0].v0;
1438                 const Vertex &v1 = triangle[0].v1;
1439                 const Vertex &v2 = triangle[0].v2;
1440
1441                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1442
1443                 if(state.cullMode == CULL_CLOCKWISE)
1444                 {
1445                         if(d >= 0) return 0;
1446                 }
1447                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1448                 {
1449                         if(d <= 0) return 0;
1450                 }
1451
1452                 // Copy attributes
1453                 triangle[1].v0 = v1;
1454                 triangle[1].v1 = v2;
1455                 triangle[2].v0 = v2;
1456                 triangle[2].v1 = v0;
1457
1458                 if(state.color[0][0].flat)   // FIXME
1459                 {
1460                         for(int i = 0; i < 2; i++)
1461                         {
1462                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1463                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1464                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1465                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1466                         }
1467                 }
1468
1469                 for(int i = 0; i < 3; i++)
1470                 {
1471                         if(setupLine(renderer, *primitive, *triangle, draw))
1472                         {
1473                                 primitive->area = 0.5f * d;
1474
1475                                 primitive++;
1476                                 visible++;
1477                         }
1478
1479                         triangle++;
1480                 }
1481
1482                 return visible;
1483         }
1484         
1485         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1486         {
1487                 Triangle *triangle = renderer->triangleBatch[unit];
1488                 Primitive *primitive = renderer->primitiveBatch[unit];
1489                 int visible = 0;
1490
1491                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1492                 SetupProcessor::State &state = draw.setupState;
1493
1494                 const Vertex &v0 = triangle[0].v0;
1495                 const Vertex &v1 = triangle[0].v1;
1496                 const Vertex &v2 = triangle[0].v2;
1497
1498                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1499
1500                 if(state.cullMode == CULL_CLOCKWISE)
1501                 {
1502                         if(d >= 0) return 0;
1503                 }
1504                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1505                 {
1506                         if(d <= 0) return 0;
1507                 }
1508
1509                 // Copy attributes
1510                 triangle[1].v0 = v1;
1511                 triangle[2].v0 = v2;
1512
1513                 for(int i = 0; i < 3; i++)
1514                 {
1515                         if(setupPoint(renderer, *primitive, *triangle, draw))
1516                         {
1517                                 primitive->area = 0.5f * d;
1518
1519                                 primitive++;
1520                                 visible++;
1521                         }
1522
1523                         triangle++;
1524                 }
1525
1526                 return visible;
1527         }
1528
1529         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1530         {
1531                 Triangle *triangle = renderer->triangleBatch[unit];
1532                 Primitive *primitive = renderer->primitiveBatch[unit];
1533                 int visible = 0;
1534
1535                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1536                 SetupProcessor::State &state = draw.setupState;
1537
1538                 int ms = state.multiSample;
1539
1540                 for(int i = 0; i < count; i++)
1541                 {
1542                         if(setupLine(renderer, *primitive, *triangle, draw))
1543                         {
1544                                 primitive += ms;
1545                                 visible++;
1546                         }
1547
1548                         triangle++;
1549                 }
1550
1551                 return visible;
1552         }
1553
1554         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1555         {
1556                 Triangle *triangle = renderer->triangleBatch[unit];
1557                 Primitive *primitive = renderer->primitiveBatch[unit];
1558                 int visible = 0;
1559
1560                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1561                 SetupProcessor::State &state = draw.setupState;
1562
1563                 int ms = state.multiSample;
1564
1565                 for(int i = 0; i < count; i++)
1566                 {
1567                         if(setupPoint(renderer, *primitive, *triangle, draw))
1568                         {
1569                                 primitive += ms;
1570                                 visible++;
1571                         }
1572
1573                         triangle++;
1574                 }
1575
1576                 return visible;
1577         }
1578
1579         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1580         {
1581                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1582                 const SetupProcessor::State &state = draw.setupState;
1583                 const DrawData &data = *draw.data;
1584
1585                 float lineWidth = data.lineWidth;
1586
1587                 Vertex &v0 = triangle.v0;
1588                 Vertex &v1 = triangle.v1;
1589
1590                 int pos = state.positionRegister;
1591
1592                 const float4 &P0 = v0.v[pos];
1593                 const float4 &P1 = v1.v[pos];
1594
1595                 if(P0.w <= 0 && P1.w <= 0)
1596                 {
1597                         return false;
1598                 }
1599
1600                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1601                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1602
1603                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1604                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1605
1606                 if(dx == 0 && dy == 0)
1607                 {
1608                         return false;
1609                 }
1610
1611                 if(false)   // Rectangle
1612                 {
1613                         float4 P[4];
1614                         int C[4];
1615
1616                         P[0] = P0;
1617                         P[1] = P1;
1618                         P[2] = P1;
1619                         P[3] = P0;
1620
1621                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1622
1623                         dx *= scale;
1624                         dy *= scale;
1625
1626                         float dx0w = dx * P0.w / W;
1627                         float dy0h = dy * P0.w / H;
1628                         float dx0h = dx * P0.w / H;
1629                         float dy0w = dy * P0.w / W;
1630
1631                         float dx1w = dx * P1.w / W;
1632                         float dy1h = dy * P1.w / H;
1633                         float dx1h = dx * P1.w / H;
1634                         float dy1w = dy * P1.w / W;
1635
1636                         P[0].x += -dy0w + -dx0w;
1637                         P[0].y += -dx0h + +dy0h;
1638                         C[0] = computeClipFlags(P[0], data);
1639
1640                         P[1].x += -dy1w + +dx1w;
1641                         P[1].y += -dx1h + +dy1h;
1642                         C[1] = computeClipFlags(P[1], data);
1643
1644                         P[2].x += +dy1w + +dx1w;
1645                         P[2].y += +dx1h + -dy1h;
1646                         C[2] = computeClipFlags(P[2], data);
1647
1648                         P[3].x += +dy0w + -dx0w;
1649                         P[3].y += +dx0h + +dy0h;
1650                         C[3] = computeClipFlags(P[3], data);
1651
1652                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1653                         {
1654                                 Polygon polygon(P, 4);
1655
1656                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1657
1658                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1659                                 {
1660                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1661                                         {
1662                                                 return false;
1663                                         }
1664                                 }
1665
1666                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1667                         }
1668                 }
1669                 else   // Diamond test convention
1670                 {
1671                         float4 P[8];
1672                         int C[8];
1673
1674                         P[0] = P0;
1675                         P[1] = P0;
1676                         P[2] = P0;
1677                         P[3] = P0;
1678                         P[4] = P1;
1679                         P[5] = P1;
1680                         P[6] = P1;
1681                         P[7] = P1;
1682
1683                         float dx0 = lineWidth * 0.5f * P0.w / W;
1684                         float dy0 = lineWidth * 0.5f * P0.w / H;
1685
1686                         float dx1 = lineWidth * 0.5f * P1.w / W;
1687                         float dy1 = lineWidth * 0.5f * P1.w / H;
1688
1689                         P[0].x += -dx0;
1690                         C[0] = computeClipFlags(P[0], data);
1691
1692                         P[1].y += +dy0;
1693                         C[1] = computeClipFlags(P[1], data);
1694
1695                         P[2].x += +dx0;
1696                         C[2] = computeClipFlags(P[2], data);
1697
1698                         P[3].y += -dy0;
1699                         C[3] = computeClipFlags(P[3], data);
1700
1701                         P[4].x += -dx1;
1702                         C[4] = computeClipFlags(P[4], data);
1703
1704                         P[5].y += +dy1;
1705                         C[5] = computeClipFlags(P[5], data);
1706
1707                         P[6].x += +dx1;
1708                         C[6] = computeClipFlags(P[6], data);
1709
1710                         P[7].y += -dy1;
1711                         C[7] = computeClipFlags(P[7], data);
1712
1713                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1714                         {
1715                                 float4 L[6];
1716
1717                                 if(dx > -dy)
1718                                 {
1719                                         if(dx > dy)   // Right
1720                                         {
1721                                                 L[0] = P[0];
1722                                                 L[1] = P[1];
1723                                                 L[2] = P[5];
1724                                                 L[3] = P[6];
1725                                                 L[4] = P[7];
1726                                                 L[5] = P[3];
1727                                         }
1728                                         else   // Down
1729                                         {
1730                                                 L[0] = P[0];
1731                                                 L[1] = P[4];
1732                                                 L[2] = P[5];
1733                                                 L[3] = P[6];
1734                                                 L[4] = P[2];
1735                                                 L[5] = P[3];
1736                                         }
1737                                 }
1738                                 else
1739                                 {
1740                                         if(dx > dy)   // Up
1741                                         {
1742                                                 L[0] = P[0];
1743                                                 L[1] = P[1];
1744                                                 L[2] = P[2];
1745                                                 L[3] = P[6];
1746                                                 L[4] = P[7];
1747                                                 L[5] = P[4];
1748                                         }
1749                                         else   // Left
1750                                         {
1751                                                 L[0] = P[1];
1752                                                 L[1] = P[2];
1753                                                 L[2] = P[3];
1754                                                 L[3] = P[7];
1755                                                 L[4] = P[4];
1756                                                 L[5] = P[5];
1757                                         }
1758                                 }
1759
1760                                 Polygon polygon(L, 6);
1761
1762                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1763
1764                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1765                                 {
1766                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1767                                         {
1768                                                 return false;
1769                                         }
1770                                 }
1771
1772                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1773                         }
1774                 }
1775
1776                 return false;
1777         }
1778
1779         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1780         {
1781                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1782                 const SetupProcessor::State &state = draw.setupState;
1783                 const DrawData &data = *draw.data;
1784
1785                 Vertex &v = triangle.v0;
1786
1787                 float pSize;
1788
1789                 int pts = state.pointSizeRegister;
1790
1791                 if(state.pointSizeRegister != 0xF)
1792                 {
1793                         pSize = v.v[pts].y;
1794                 }
1795                 else
1796                 {
1797                         pSize = data.point.pointSize[0];
1798                 }
1799
1800                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1801
1802                 float4 P[4];
1803                 int C[4];
1804
1805                 int pos = state.positionRegister;
1806
1807                 P[0] = v.v[pos];
1808                 P[1] = v.v[pos];
1809                 P[2] = v.v[pos];
1810                 P[3] = v.v[pos];
1811
1812                 const float X = pSize * P[0].w * data.halfPixelX[0];
1813                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1814
1815                 P[0].x -= X;
1816                 P[0].y += Y;
1817                 C[0] = computeClipFlags(P[0], data);
1818
1819                 P[1].x += X;
1820                 P[1].y += Y;
1821                 C[1] = computeClipFlags(P[1], data);
1822
1823                 P[2].x += X;
1824                 P[2].y -= Y;
1825                 C[2] = computeClipFlags(P[2], data);
1826
1827                 P[3].x -= X;
1828                 P[3].y -= Y;
1829                 C[3] = computeClipFlags(P[3], data);
1830
1831                 triangle.v1 = triangle.v0;
1832                 triangle.v2 = triangle.v0;
1833
1834                 triangle.v1.X += iround(16 * 0.5f * pSize);
1835                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1836
1837                 Polygon polygon(P, 4);
1838
1839                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1840                 {
1841                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1842
1843                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1844                         {
1845                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1846                                 {
1847                                         return false;
1848                                 }
1849                         }
1850                         
1851                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1852                 }
1853
1854                 return false;
1855         }
1856
1857         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1858         {
1859                 float clX = v.x + data.halfPixelX[0] * v.w;
1860                 float clY = v.y + data.halfPixelY[0] * v.w;
1861
1862                 return ((clX > v.w)  << 0) |
1863                            ((clY > v.w)  << 1) |
1864                            ((v.z > v.w)  << 2) |
1865                            ((clX < -v.w) << 3) |
1866                        ((clY < -v.w) << 4) |
1867                            ((v.z < 0)    << 5) |
1868                            Clipper::CLIP_FINITE;   // FIXME: xyz finite
1869         }
1870
1871         void Renderer::initializeThreads()
1872         {
1873                 unitCount = ceilPow2(threadCount);
1874                 clusterCount = ceilPow2(threadCount);
1875
1876                 for(int i = 0; i < unitCount; i++)
1877                 {
1878                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1879                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1880                 }
1881
1882                 for(int i = 0; i < threadCount; i++)
1883                 {
1884                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1885                         vertexTask[i]->vertexCache.drawCall = -1;
1886
1887                         task[i].type = Task::SUSPEND;
1888
1889                         resume[i] = new Event();
1890                         suspend[i] = new Event();
1891
1892                         Parameters parameters;
1893                         parameters.threadIndex = i;
1894                         parameters.renderer = this;
1895
1896                         exitThreads = false;
1897                         worker[i] = new Thread(threadFunction, &parameters);
1898
1899                         suspend[i]->wait();
1900                         suspend[i]->signal();
1901                 }
1902         }
1903
1904         void Renderer::terminateThreads()
1905         {
1906                 while(threadsAwake != 0)
1907                 {
1908                         Thread::sleep(1);
1909                 }
1910
1911                 for(int thread = 0; thread < threadCount; thread++)
1912                 {
1913                         if(worker[thread])
1914                         {
1915                                 exitThreads = true;
1916                                 resume[thread]->signal();
1917                                 worker[thread]->join();
1918                                 
1919                                 delete worker[thread];
1920                                 worker[thread] = 0;
1921                                 delete resume[thread];
1922                                 resume[thread] = 0;
1923                                 delete suspend[thread];
1924                                 suspend[thread] = 0;
1925                         }
1926                 
1927                         deallocate(vertexTask[thread]);
1928                         vertexTask[thread] = 0;
1929                 }
1930
1931                 for(int i = 0; i < 16; i++)
1932                 {
1933                         deallocate(triangleBatch[i]);
1934                         triangleBatch[i] = 0;
1935
1936                         deallocate(primitiveBatch[i]);
1937                         primitiveBatch[i] = 0;
1938                 }
1939         }
1940
1941         void Renderer::loadConstants(const VertexShader *vertexShader)
1942         {
1943                 if(!vertexShader) return;
1944
1945                 size_t count = vertexShader->getLength();
1946
1947                 for(size_t i = 0; i < count; i++)
1948                 {
1949                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1950
1951                         if(instruction->opcode == Shader::OPCODE_DEF)
1952                         {
1953                                 int index = instruction->dst.index;
1954                                 float value[4];
1955
1956                                 value[0] = instruction->src[0].value[0];
1957                                 value[1] = instruction->src[0].value[1];
1958                                 value[2] = instruction->src[0].value[2];
1959                                 value[3] = instruction->src[0].value[3];
1960
1961                                 setVertexShaderConstantF(index, value);
1962                         }
1963                         else if(instruction->opcode == Shader::OPCODE_DEFI)
1964                         {
1965                                 int index = instruction->dst.index;
1966                                 int integer[4];
1967
1968                                 integer[0] = instruction->src[0].integer[0];
1969                                 integer[1] = instruction->src[0].integer[1];
1970                                 integer[2] = instruction->src[0].integer[2];
1971                                 integer[3] = instruction->src[0].integer[3];
1972
1973                                 setVertexShaderConstantI(index, integer);
1974                         }
1975                         else if(instruction->opcode == Shader::OPCODE_DEFB)
1976                         {
1977                                 int index = instruction->dst.index;
1978                                 int boolean = instruction->src[0].boolean[0];
1979
1980                                 setVertexShaderConstantB(index, &boolean);
1981                         }
1982                 }
1983         }
1984
1985         void Renderer::loadConstants(const PixelShader *pixelShader)
1986         {
1987                 if(!pixelShader) return;
1988
1989                 size_t count = pixelShader->getLength();
1990
1991                 for(size_t i = 0; i < count; i++)
1992                 {
1993                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
1994
1995                         if(instruction->opcode == Shader::OPCODE_DEF)
1996                         {
1997                                 int index = instruction->dst.index;
1998                                 float value[4];
1999
2000                                 value[0] = instruction->src[0].value[0];
2001                                 value[1] = instruction->src[0].value[1];
2002                                 value[2] = instruction->src[0].value[2];
2003                                 value[3] = instruction->src[0].value[3];
2004
2005                                 setPixelShaderConstantF(index, value);
2006                         }
2007                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2008                         {
2009                                 int index = instruction->dst.index;
2010                                 int integer[4];
2011
2012                                 integer[0] = instruction->src[0].integer[0];
2013                                 integer[1] = instruction->src[0].integer[1];
2014                                 integer[2] = instruction->src[0].integer[2];
2015                                 integer[3] = instruction->src[0].integer[3];
2016
2017                                 setPixelShaderConstantI(index, integer);
2018                         }
2019                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2020                         {
2021                                 int index = instruction->dst.index;
2022                                 int boolean = instruction->src[0].boolean[0];
2023
2024                                 setPixelShaderConstantB(index, &boolean);
2025                         }
2026                 }
2027         }
2028
2029         void Renderer::setIndexBuffer(Resource *indexBuffer)
2030         {
2031                 context->indexBuffer = indexBuffer;
2032         }
2033
2034         void Renderer::setMultiSampleMask(unsigned int mask)
2035         {
2036                 context->sampleMask = mask;
2037         }
2038
2039         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2040         {
2041                 sw::transparencyAntialiasing = transparencyAntialiasing;
2042         }
2043
2044         bool Renderer::isReadWriteTexture(int sampler)
2045         {
2046                 for(int index = 0; index < 4; index++)
2047                 {
2048                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2049                         {
2050                                 return true;
2051                         }
2052                 }
2053         
2054                 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2055                 {
2056                         return true;
2057                 }
2058
2059                 return false;
2060         }
2061         
2062         void Renderer::updateClipper()
2063         {
2064                 if(updateClipPlanes)
2065                 {
2066                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2067                         {
2068                                 const Matrix &scissorWorld = getViewTransform();
2069
2070                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2071                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2072                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2073                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2074                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2075                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2076                         }
2077                         else   // User plane in clip space
2078                         {
2079                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2080                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2081                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2082                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2083                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2084                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2085                         }
2086
2087                         updateClipPlanes = false;
2088                 }
2089         }
2090
2091         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2092         {
2093                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2094
2095                 context->texture[sampler] = resource;
2096         }
2097
2098         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2099         {
2100                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2101                 
2102                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2103         }
2104
2105         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2106         {
2107                 if(type == SAMPLER_PIXEL)
2108                 {
2109                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2110                 }
2111                 else
2112                 {
2113                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2114                 }
2115         }
2116
2117         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2118         {
2119                 if(type == SAMPLER_PIXEL)
2120                 {
2121                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2122                 }
2123                 else
2124                 {
2125                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2126                 }
2127         }
2128
2129         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2130         {
2131                 if(type == SAMPLER_PIXEL)
2132                 {
2133                         PixelProcessor::setGatherEnable(sampler, enable);
2134                 }
2135                 else
2136                 {
2137                         VertexProcessor::setGatherEnable(sampler, enable);
2138                 }
2139         }
2140
2141         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2142         {
2143                 if(type == SAMPLER_PIXEL)
2144                 {
2145                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2146                 }
2147                 else
2148                 {
2149                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2150                 }
2151         }
2152
2153         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2154         {
2155                 if(type == SAMPLER_PIXEL)
2156                 {
2157                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2158                 }
2159                 else
2160                 {
2161                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2162                 }
2163         }
2164
2165         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2166         {
2167                 if(type == SAMPLER_PIXEL)
2168                 {
2169                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2170                 }
2171                 else
2172                 {
2173                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2174                 }
2175         }
2176
2177         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2178         {
2179                 if(type == SAMPLER_PIXEL)
2180                 {
2181                         PixelProcessor::setReadSRGB(sampler, sRGB);
2182                 }
2183                 else
2184                 {
2185                         VertexProcessor::setReadSRGB(sampler, sRGB);
2186                 }
2187         }
2188
2189         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2190         {
2191                 if(type == SAMPLER_PIXEL)
2192                 {
2193                         PixelProcessor::setMipmapLOD(sampler, bias);
2194                 }
2195                 else
2196                 {
2197                         VertexProcessor::setMipmapLOD(sampler, bias);
2198                 }
2199         }
2200
2201         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2202         {
2203                 if(type == SAMPLER_PIXEL)
2204                 {
2205                         PixelProcessor::setBorderColor(sampler, borderColor);
2206                 }
2207                 else
2208                 {
2209                         VertexProcessor::setBorderColor(sampler, borderColor);
2210                 }
2211         }
2212
2213         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2214         {
2215                 if(type == SAMPLER_PIXEL)
2216                 {
2217                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2218                 }
2219                 else
2220                 {
2221                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2222                 }
2223         }
2224
2225         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2226         {
2227                 context->setPointSpriteEnable(pointSpriteEnable);
2228         }
2229
2230         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2231         {
2232                 context->setPointScaleEnable(pointScaleEnable);
2233         }
2234
2235         void Renderer::setLineWidth(float width)
2236         {
2237                 context->lineWidth = width;
2238         }
2239
2240         void Renderer::setDepthBias(float bias)
2241         {
2242                 depthBias = bias;
2243         }
2244
2245         void Renderer::setSlopeDepthBias(float slopeBias)
2246         {
2247                 slopeDepthBias = slopeBias;
2248         }
2249
2250         void Renderer::setPixelShader(const PixelShader *shader)
2251         {
2252                 context->pixelShader = shader;
2253
2254                 loadConstants(shader);
2255         }
2256
2257         void Renderer::setVertexShader(const VertexShader *shader)
2258         {
2259                 context->vertexShader = shader;
2260
2261                 loadConstants(shader);
2262         }
2263
2264         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2265         {
2266                 for(int i = 0; i < DRAW_COUNT; i++)
2267                 {
2268                         if(drawCall[i]->psDirtyConstF < index + count)
2269                         {
2270                                 drawCall[i]->psDirtyConstF = index + count;
2271                         }
2272                 }
2273
2274                 for(int i = 0; i < count; i++)
2275                 {
2276                         PixelProcessor::setFloatConstant(index + i, value);
2277                         value += 4;
2278                 }
2279         }
2280
2281         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2282         {
2283                 for(int i = 0; i < DRAW_COUNT; i++)
2284                 {
2285                         if(drawCall[i]->psDirtyConstI < index + count)
2286                         {
2287                                 drawCall[i]->psDirtyConstI = index + count;
2288                         }
2289                 }
2290
2291                 for(int i = 0; i < count; i++)
2292                 {
2293                         PixelProcessor::setIntegerConstant(index + i, value);
2294                         value += 4;
2295                 }
2296         }
2297
2298         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2299         {
2300                 for(int i = 0; i < DRAW_COUNT; i++)
2301                 {
2302                         if(drawCall[i]->psDirtyConstB < index + count)
2303                         {
2304                                 drawCall[i]->psDirtyConstB = index + count;
2305                         }
2306                 }
2307
2308                 for(int i = 0; i < count; i++)
2309                 {
2310                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2311                         boolean++;
2312                 }
2313         }
2314
2315         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2316         {
2317                 for(int i = 0; i < DRAW_COUNT; i++)
2318                 {
2319                         if(drawCall[i]->vsDirtyConstF < index + count)
2320                         {
2321                                 drawCall[i]->vsDirtyConstF = index + count;
2322                         }
2323                 }
2324
2325                 for(int i = 0; i < count; i++)
2326                 {
2327                         VertexProcessor::setFloatConstant(index + i, value);
2328                         value += 4;
2329                 }
2330         }
2331
2332         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2333         {
2334                 for(int i = 0; i < DRAW_COUNT; i++)
2335                 {
2336                         if(drawCall[i]->vsDirtyConstI < index + count)
2337                         {
2338                                 drawCall[i]->vsDirtyConstI = index + count;
2339                         }
2340                 }
2341
2342                 for(int i = 0; i < count; i++)
2343                 {
2344                         VertexProcessor::setIntegerConstant(index + i, value);
2345                         value += 4;
2346                 }
2347         }
2348
2349         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2350         {
2351                 for(int i = 0; i < DRAW_COUNT; i++)
2352                 {
2353                         if(drawCall[i]->vsDirtyConstB < index + count)
2354                         {
2355                                 drawCall[i]->vsDirtyConstB = index + count;
2356                         }
2357                 }
2358
2359                 for(int i = 0; i < count; i++)
2360                 {
2361                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2362                         boolean++;
2363                 }
2364         }
2365
2366         void Renderer::setModelMatrix(const Matrix &M, int i)
2367         {
2368                 VertexProcessor::setModelMatrix(M, i);
2369         }
2370
2371         void Renderer::setViewMatrix(const Matrix &V)
2372         {
2373                 VertexProcessor::setViewMatrix(V);
2374                 updateClipPlanes = true;
2375         }
2376
2377         void Renderer::setBaseMatrix(const Matrix &B)
2378         {
2379                 VertexProcessor::setBaseMatrix(B);
2380                 updateClipPlanes = true;
2381         }
2382
2383         void Renderer::setProjectionMatrix(const Matrix &P)
2384         {
2385                 VertexProcessor::setProjectionMatrix(P);
2386                 updateClipPlanes = true;
2387         }
2388
2389         void Renderer::addQuery(Query *query)
2390         {
2391                 queries.push_back(query);
2392         }
2393         
2394         void Renderer::removeQuery(Query *query)
2395         {
2396                 queries.remove(query);
2397         }
2398
2399         #if PERF_HUD
2400                 int Renderer::getThreadCount()
2401                 {
2402                         return threadCount;
2403                 }
2404                 
2405                 int64_t Renderer::getVertexTime(int thread)
2406                 {
2407                         return vertexTime[thread];
2408                 }
2409
2410                 int64_t Renderer::getSetupTime(int thread)
2411                 {
2412                         return setupTime[thread];
2413                 }
2414                         
2415                 int64_t Renderer::getPixelTime(int thread)
2416                 {
2417                         return pixelTime[thread];
2418                 }
2419
2420                 void Renderer::resetTimers()
2421                 {
2422                         for(int thread = 0; thread < threadCount; thread++)
2423                         {
2424                                 vertexTime[thread] = 0;
2425                                 setupTime[thread] = 0;
2426                                 pixelTime[thread] = 0;
2427                         }
2428                 }
2429         #endif
2430
2431         void Renderer::setViewport(const Viewport &viewport)
2432         {
2433                 this->viewport = viewport;
2434         }
2435
2436         void Renderer::setScissor(const Rect &scissor)
2437         {
2438                 this->scissor = scissor;
2439         }
2440
2441         void Renderer::setClipFlags(int flags)
2442         {
2443                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2444         }
2445
2446         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2447         {
2448                 if(index < 6)
2449                 {
2450                         userPlane[index] = plane;
2451                 }
2452                 else ASSERT(false);
2453
2454                 updateClipPlanes = true;
2455         }
2456
2457         void Renderer::updateConfiguration(bool initialUpdate)
2458         {
2459                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2460
2461                 if(newConfiguration || initialUpdate)
2462                 {
2463                         terminateThreads();
2464
2465                         SwiftConfig::Configuration configuration = {0};
2466                         swiftConfig->getConfiguration(configuration);
2467
2468                         precacheVertex = !newConfiguration && configuration.precache;
2469                         precacheSetup = !newConfiguration && configuration.precache;
2470                         precachePixel = !newConfiguration && configuration.precache;
2471
2472                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2473                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2474                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2475
2476                         switch(configuration.textureSampleQuality)
2477                         {
2478                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2479                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2480                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2481                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2482                         }
2483
2484                         switch(configuration.mipmapQuality)
2485                         {
2486                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2487                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2488                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2489                         }
2490
2491                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2492
2493                         switch(configuration.transcendentalPrecision)
2494                         {
2495                         case 0:
2496                                 logPrecision = APPROXIMATE;
2497                                 expPrecision = APPROXIMATE;
2498                                 rcpPrecision = APPROXIMATE;
2499                                 rsqPrecision = APPROXIMATE;
2500                                 break;
2501                         case 1:
2502                                 logPrecision = PARTIAL;
2503                                 expPrecision = PARTIAL;
2504                                 rcpPrecision = PARTIAL;
2505                                 rsqPrecision = PARTIAL;
2506                                 break;
2507                         case 2:
2508                                 logPrecision = ACCURATE;
2509                                 expPrecision = ACCURATE;
2510                                 rcpPrecision = ACCURATE;
2511                                 rsqPrecision = ACCURATE;
2512                                 break;
2513                         case 3:
2514                                 logPrecision = WHQL;
2515                                 expPrecision = WHQL;
2516                                 rcpPrecision = WHQL;
2517                                 rsqPrecision = WHQL;
2518                                 break;
2519                         case 4:
2520                                 logPrecision = IEEE;
2521                                 expPrecision = IEEE;
2522                                 rcpPrecision = IEEE;
2523                                 rsqPrecision = IEEE;
2524                                 break;
2525                         default:
2526                                 logPrecision = ACCURATE;
2527                                 expPrecision = ACCURATE;
2528                                 rcpPrecision = ACCURATE;
2529                                 rsqPrecision = ACCURATE;
2530                                 break;
2531                         }
2532
2533                         switch(configuration.transparencyAntialiasing)
2534                         {
2535                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2536                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2537                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2538                         }
2539
2540                         switch(configuration.threadCount)
2541                         {
2542                         case -1: threadCount = CPUID::coreCount();        break;
2543                         case 0:  threadCount = CPUID::processAffinity();  break;
2544                         default: threadCount = configuration.threadCount; break;
2545                         }
2546
2547                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2548                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2549                         CPUID::setEnableSSE3(configuration.enableSSE3);
2550                         CPUID::setEnableSSE2(configuration.enableSSE2);
2551                         CPUID::setEnableSSE(configuration.enableSSE);
2552
2553                         for(int pass = 0; pass < 10; pass++)
2554                         {
2555                                 optimization[pass] = configuration.optimization[pass];
2556                         }
2557
2558                         forceWindowed = configuration.forceWindowed;
2559                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2560                         postBlendSRGB = configuration.postBlendSRGB;
2561                         exactColorRounding = configuration.exactColorRounding;
2562                         forceClearRegisters = configuration.forceClearRegisters;
2563
2564                 #ifndef NDEBUG
2565                         minPrimitives = configuration.minPrimitives;
2566                         maxPrimitives = configuration.maxPrimitives;
2567                 #endif
2568                 }
2569
2570                 if(!initialUpdate && !worker[0])
2571                 {
2572                         initializeThreads();
2573                 }
2574         }
2575 }