OSDN Git Service

Blitter clear implementation
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2012 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Renderer.hpp"
13
14 #include "Clipper.hpp"
15 #include "Math.hpp"
16 #include "FrameBuffer.hpp"
17 #include "Timer.hpp"
18 #include "Surface.hpp"
19 #include "Half.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
24 #include "CPUID.hpp"
25 #include "Memory.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
28 #include "Debug.hpp"
29 #include "Reactor/Reactor.hpp"
30
31 #include <malloc.h>
32
33 #undef max
34
35 bool disableServer = true;
36
37 #ifndef NDEBUG
38 unsigned int minPrimitives = 1;
39 unsigned int maxPrimitives = 1 << 21;
40 #endif
41
42 namespace sw
43 {
44         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
45         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
46         extern bool booleanFaceRegister;
47         extern bool fullPixelPositionRegister;
48         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
49         extern bool secondaryColor;             // Specular lighting is applied after texturing
50
51         extern bool forceWindowed;
52         extern bool complementaryDepthBuffer;
53         extern bool postBlendSRGB;
54         extern bool exactColorRounding;
55         extern TransparencyAntialiasing transparencyAntialiasing;
56         extern bool forceClearRegisters;
57
58         extern bool precacheVertex;
59         extern bool precacheSetup;
60         extern bool precachePixel;
61
62         int batchSize = 128;
63         int threadCount = 1;
64         int unitCount = 1;
65         int clusterCount = 1;
66
67         TranscendentalPrecision logPrecision = ACCURATE;
68         TranscendentalPrecision expPrecision = ACCURATE;
69         TranscendentalPrecision rcpPrecision = ACCURATE;
70         TranscendentalPrecision rsqPrecision = ACCURATE;
71         bool perspectiveCorrection = true;
72
73         struct Parameters
74         {
75                 Renderer *renderer;
76                 int threadIndex;
77         };
78
79         DrawCall::DrawCall()
80         {
81                 queries = 0;
82
83                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
84                 vsDirtyConstI = 16;
85                 vsDirtyConstB = 16;
86
87                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
88                 psDirtyConstI = 16;
89                 psDirtyConstB = 16;
90
91                 references = -1;
92
93                 data = (DrawData*)allocate(sizeof(DrawData));
94                 data->constants = &constants;
95         }
96
97         DrawCall::~DrawCall()
98         {
99                 delete queries;
100
101                 deallocate(data);
102         }
103
104         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
105         {
106                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
107                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
108                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
109                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
110                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
111                 sw::secondaryColor = conventions.secondaryColor;
112                 sw::exactColorRounding = exactColorRounding;
113
114                 setRenderTarget(0, 0);
115                 clipper = new Clipper();
116
117                 updateViewMatrix = true;
118                 updateBaseMatrix = true;
119                 updateProjectionMatrix = true;
120                 updateClipPlanes = true;
121
122                 #if PERF_HUD
123                         resetTimers();
124                 #endif
125
126                 for(int i = 0; i < 16; i++)
127                 {
128                         vertexTask[i] = 0;
129
130                         worker[i] = 0;
131                         resume[i] = 0;
132                         suspend[i] = 0;
133                 }
134
135                 threadsAwake = 0;
136                 resumeApp = new Event();
137
138                 currentDraw = 0;
139                 nextDraw = 0;
140
141                 qHead = 0;
142                 qSize = 0;
143
144                 for(int i = 0; i < 16; i++)
145                 {
146                         triangleBatch[i] = 0;
147                         primitiveBatch[i] = 0;
148                 }
149
150                 for(int draw = 0; draw < DRAW_COUNT; draw++)
151                 {
152                         drawCall[draw] = new DrawCall();
153                         drawList[draw] = drawCall[draw];
154                 }
155
156                 for(int unit = 0; unit < 16; unit++)
157                 {
158                         primitiveProgress[unit].init();
159                 }
160
161                 for(int cluster = 0; cluster < 16; cluster++)
162                 {
163                         pixelProgress[cluster].init();
164                 }
165
166                 clipFlags = 0;
167
168                 swiftConfig = new SwiftConfig(disableServer);
169                 updateConfiguration(true);
170
171                 sync = new Resource(0);
172         }
173
174         Renderer::~Renderer()
175         {
176                 sync->destruct();
177
178                 delete clipper;
179                 clipper = 0;
180
181                 terminateThreads();
182                 delete resumeApp;
183
184                 for(int draw = 0; draw < DRAW_COUNT; draw++)
185                 {
186                         delete drawCall[draw];
187                 }
188
189                 delete swiftConfig;
190         }
191
192         void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
193         {
194                 blitter.clear(pixel, format, dest, dRect, rgbaMask);
195         }
196
197         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
198         {
199                 blitter.blit(source, sRect, dest, dRect, filter);
200         }
201
202         void Renderer::blit3D(Surface *source, Surface *dest)
203         {
204                 blitter.blit3D(source, dest);
205         }
206
207         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
208         {
209                 #ifndef NDEBUG
210                         if(count < minPrimitives || count > maxPrimitives)
211                         {
212                                 return;
213                         }
214                 #endif
215
216                 context->drawType = drawType;
217
218                 updateConfiguration();
219                 updateClipper();
220
221                 int ss = context->getSuperSampleCount();
222                 int ms = context->getMultiSampleCount();
223
224                 for(int q = 0; q < ss; q++)
225                 {
226                         int oldMultiSampleMask = context->multiSampleMask;
227                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
228
229                         if(!context->multiSampleMask)
230                         {
231                                 continue;
232                         }
233
234                         sync->lock(sw::PRIVATE);
235
236                         Routine *vertexRoutine;
237                         Routine *setupRoutine;
238                         Routine *pixelRoutine;
239
240                         if(update || oldMultiSampleMask != context->multiSampleMask)
241                         {
242                                 vertexState = VertexProcessor::update();
243                                 setupState = SetupProcessor::update();
244                                 pixelState = PixelProcessor::update();
245
246                                 vertexRoutine = VertexProcessor::routine(vertexState);
247                                 setupRoutine = SetupProcessor::routine(setupState);
248                                 pixelRoutine = PixelProcessor::routine(pixelState);
249                         }
250
251                         int batch = batchSize / ms;
252
253                         int (*setupPrimitives)(Renderer *renderer, int batch, int count);
254
255                         if(context->isDrawTriangle())
256                         {
257                                 switch(context->fillMode)
258                                 {
259                                 case FILL_SOLID:
260                                         setupPrimitives = setupSolidTriangles;
261                                         break;
262                                 case FILL_WIREFRAME:
263                                         setupPrimitives = setupWireframeTriangle;
264                                         batch = 1;
265                                         break;
266                                 case FILL_VERTEX:
267                                         setupPrimitives = setupVertexTriangle;
268                                         batch = 1;
269                                         break;
270                                 default: ASSERT(false);
271                                 }
272                         }
273                         else if(context->isDrawLine())
274                         {
275                                 setupPrimitives = setupLines;
276                         }
277                         else   // Point draw
278                         {
279                                 setupPrimitives = setupPoints;
280                         }
281
282                         DrawCall *draw = 0;
283
284                         do
285                         {
286                                 for(int i = 0; i < DRAW_COUNT; i++)
287                                 {
288                                         if(drawCall[i]->references == -1)
289                                         {
290                                                 draw = drawCall[i];
291                                                 drawList[nextDraw % DRAW_COUNT] = draw;
292
293                                                 break;
294                                         }
295                                 }
296
297                                 if(!draw)
298                                 {
299                                         resumeApp->wait();
300                                 }
301                         }
302                         while(!draw);
303
304                         DrawData *data = draw->data;
305
306                         if(queries.size() != 0)
307                         {
308                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
309                                 {
310                                         atomicIncrement(&(*query)->reference);
311                                 }
312
313                                 draw->queries = new std::list<Query*>(queries);
314                         }
315
316                         draw->drawType = drawType;
317                         draw->batchSize = batch;
318
319                         vertexRoutine->bind();
320                         setupRoutine->bind();
321                         pixelRoutine->bind();
322
323                         draw->vertexRoutine = vertexRoutine;
324                         draw->setupRoutine = setupRoutine;
325                         draw->pixelRoutine = pixelRoutine;
326                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
327                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
328                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
329                         draw->setupPrimitives = setupPrimitives;
330                         draw->setupState = setupState;
331
332                         for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
333                         {
334                                 draw->vertexStream[i] = context->input[i].resource;
335                                 data->input[i] = context->input[i].buffer;
336                                 data->stride[i] = context->input[i].stride;
337
338                                 if(draw->vertexStream[i])
339                                 {
340                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
341                                 }
342                         }
343
344                         if(context->indexBuffer)
345                         {
346                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
347                         }
348
349                         draw->indexBuffer = context->indexBuffer;
350
351                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
352                         {
353                                 draw->texture[sampler] = 0;
354                         }
355
356                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
357                         {
358                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
359                                 {
360                                         draw->texture[sampler] = context->texture[sampler];
361                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
362
363                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
364                                 }
365                         }
366
367                         if(context->pixelShader)
368                         {
369                                 if(draw->psDirtyConstF)
370                                 {
371                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
372                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
373                                         draw->psDirtyConstF = 0;
374                                 }
375
376                                 if(draw->psDirtyConstI)
377                                 {
378                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
379                                         draw->psDirtyConstI = 0;
380                                 }
381
382                                 if(draw->psDirtyConstB)
383                                 {
384                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
385                                         draw->psDirtyConstB = 0;
386                                 }
387                         }
388                         
389                         if(context->pixelShaderVersion() <= 0x0104)
390                         {
391                                 for(int stage = 0; stage < 8; stage++)
392                                 {
393                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
394                                         {
395                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
396                                         }
397                                         else break;
398                                 }
399                         }
400
401                         if(context->vertexShader)
402                         {
403                                 if(context->vertexShader->getVersion() >= 0x0300)
404                                 {
405                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
406                                         {
407                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
408                                                 {
409                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
410                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
411
412                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
413                                                 }
414                                         }
415                                 }
416
417                                 if(draw->vsDirtyConstF)
418                                 {
419                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
420                                         draw->vsDirtyConstF = 0;
421                                 }
422
423                                 if(draw->vsDirtyConstI)
424                                 {
425                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
426                                         draw->vsDirtyConstI = 0;
427                                 }
428
429                                 if(draw->vsDirtyConstB)
430                                 {
431                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
432                                         draw->vsDirtyConstB = 0;
433                                 }
434
435                                 if(context->vertexShader->instanceIdDeclared)
436                                 {
437                                         data->instanceID = context->instanceID;
438                                 }
439                         }
440                         else
441                         {
442                                 data->ff = ff;
443
444                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
445                                 draw->vsDirtyConstI = 16;
446                                 draw->vsDirtyConstB = 16;
447                         }
448
449                         if(pixelState.stencilActive)
450                         {
451                                 data->stencil[0] = stencil;
452                                 data->stencil[1] = stencilCCW;
453                         }
454
455                         if(pixelState.fogActive)
456                         {
457                                 data->fog = fog;
458                         }
459
460                         if(setupState.isDrawPoint)
461                         {
462                                 data->point = point;
463                         }
464
465                         data->lineWidth = context->lineWidth;
466
467                         data->factor = factor;
468
469                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
470                         {
471                                 float ref = context->alphaReference * (1.0f / 255.0f);
472                                 float margin = sw::min(ref, 1.0f - ref);
473
474                                 if(ms == 4)
475                                 {
476                                         data->a2c0 = replicate(ref - margin * 0.6f);
477                                         data->a2c1 = replicate(ref - margin * 0.2f);
478                                         data->a2c2 = replicate(ref + margin * 0.2f);
479                                         data->a2c3 = replicate(ref + margin * 0.6f);
480                                 }
481                                 else if(ms == 2)
482                                 {
483                                         data->a2c0 = replicate(ref - margin * 0.3f);
484                                         data->a2c1 = replicate(ref + margin * 0.3f);
485                                 }
486                                 else ASSERT(false);
487                         }
488
489                         if(pixelState.occlusionEnabled)
490                         {
491                                 for(int cluster = 0; cluster < clusterCount; cluster++)
492                                 {
493                                         data->occlusion[cluster] = 0;
494                                 }
495                         }
496
497                         #if PERF_PROFILE
498                                 for(int cluster = 0; cluster < clusterCount; cluster++)
499                                 {
500                                         for(int i = 0; i < PERF_TIMERS; i++)
501                                         {
502                                                 data->cycles[i][cluster] = 0;
503                                         }
504                                 }
505                         #endif
506
507                         // Viewport
508                         {
509                                 float W = 0.5f * viewport.width;
510                                 float H = 0.5f * viewport.height;
511                                 float X0 = viewport.x0 + W;
512                                 float Y0 = viewport.y0 + H;
513                                 float N = viewport.minZ;
514                                 float F = viewport.maxZ;
515                                 float Z = F - N;
516
517                                 if(context->isDrawTriangle(false))
518                                 {
519                                         N += depthBias;
520                                 }
521
522                                 if(complementaryDepthBuffer)
523                                 {
524                                         Z = -Z;
525                                         N = 1 - N;
526                                 }
527
528                                 static const float X[5][16] =   // Fragment offsets
529                                 {
530                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
531                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
532                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
533                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
534                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
535                                 };
536
537                                 static const float Y[5][16] =   // Fragment offsets
538                                 {
539                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
540                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
541                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
542                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
543                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
544                                 };
545
546                                 int s = sw::log2(ss);
547
548                                 data->Wx16 = replicate(W * 16);
549                                 data->Hx16 = replicate(H * 16);
550                                 data->X0x16 = replicate(X0 * 16 - 8);
551                                 data->Y0x16 = replicate(Y0 * 16 - 8);
552                                 data->XXXX = replicate(X[s][q] / W);
553                                 data->YYYY = replicate(Y[s][q] / H);
554                                 data->halfPixelX = replicate(0.5f / W);
555                                 data->halfPixelY = replicate(0.5f / H);
556                                 data->viewportHeight = abs(viewport.height);
557                                 data->slopeDepthBias = slopeDepthBias;
558                                 data->depthRange = Z;
559                                 data->depthNear = N;
560                                 draw->clipFlags = clipFlags;
561
562                                 if(clipFlags)
563                                 {
564                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
565                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
566                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
567                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
568                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
569                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
570                                 }
571                         }
572
573                         // Target
574                         {
575                                 for(int index = 0; index < RENDERTARGETS; index++)
576                                 {
577                                         draw->renderTarget[index] = context->renderTarget[index];
578
579                                         if(draw->renderTarget[index])
580                                         {
581                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
582                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
583                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
584                                         }
585                                 }
586
587                                 draw->depthStencil = context->depthStencil;
588
589                                 if(draw->depthStencil)
590                                 {
591                                         data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
592                                         data->depthPitchB = context->depthStencil->getInternalPitchB();
593                                         data->depthSliceB = context->depthStencil->getInternalSliceB();
594
595                                         data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
596                                         data->stencilPitchB = context->depthStencil->getStencilPitchB();
597                                         data->stencilSliceB = context->depthStencil->getStencilSliceB();
598                                 }
599                         }
600
601                         // Scissor
602                         {
603                                 data->scissorX0 = scissor.x0;
604                                 data->scissorX1 = scissor.x1;
605                                 data->scissorY0 = scissor.y0;
606                                 data->scissorY1 = scissor.y1;
607                         }
608
609                         draw->primitive = 0;
610                         draw->count = count;
611
612                         draw->references = (count + batch - 1) / batch;
613
614                         schedulerMutex.lock();
615                         nextDraw++;
616                         schedulerMutex.unlock();
617
618                         if(threadCount > 1)
619                         {
620                                 if(!threadsAwake)
621                                 {
622                                         suspend[0]->wait();
623
624                                         threadsAwake = 1;
625                                         task[0].type = Task::RESUME;
626
627                                         resume[0]->signal();
628                                 }
629                         }
630                         else   // Use main thread for draw execution
631                         {
632                                 threadsAwake = 1;
633                                 task[0].type = Task::RESUME;
634
635                                 taskLoop(0);
636                         }
637                 }
638         }
639
640         void Renderer::threadFunction(void *parameters)
641         {
642                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
643                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
644
645                 if(logPrecision < IEEE)
646                 {
647                         CPUID::setFlushToZero(true);
648                         CPUID::setDenormalsAreZero(true);
649                 }
650
651                 renderer->threadLoop(threadIndex);
652         }
653
654         void Renderer::threadLoop(int threadIndex)
655         {
656                 while(!exitThreads)
657                 {
658                         taskLoop(threadIndex);
659
660                         suspend[threadIndex]->signal();
661                         resume[threadIndex]->wait();
662                 }
663         }
664
665         void Renderer::taskLoop(int threadIndex)
666         {
667                 while(task[threadIndex].type != Task::SUSPEND)
668                 {
669                         scheduleTask(threadIndex);
670                         executeTask(threadIndex);
671                 }
672         }
673
674         void Renderer::findAvailableTasks()
675         {
676                 // Find pixel tasks
677                 for(int cluster = 0; cluster < clusterCount; cluster++)
678                 {
679                         if(!pixelProgress[cluster].executing)
680                         {
681                                 for(int unit = 0; unit < unitCount; unit++)
682                                 {
683                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
684                                         {
685                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
686                                                 {
687                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
688                                                         {
689                                                                 Task &task = taskQueue[qHead];
690                                                                 task.type = Task::PIXELS;
691                                                                 task.primitiveUnit = unit;
692                                                                 task.pixelCluster = cluster;
693
694                                                                 pixelProgress[cluster].executing = true;
695
696                                                                 // Commit to the task queue
697                                                                 qHead = (qHead + 1) % 32;
698                                                                 qSize++;
699
700                                                                 break;
701                                                         }
702                                                 }
703                                         }
704                                 }
705                         }
706                 }
707         
708                 // Find primitive tasks
709                 if(currentDraw == nextDraw)
710                 {
711                         return;   // No more primitives to process
712                 }
713
714                 for(int unit = 0; unit < unitCount; unit++)
715                 {
716                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
717
718                         if(draw->primitive >= draw->count)
719                         {
720                                 currentDraw++;
721
722                                 if(currentDraw == nextDraw)
723                                 {
724                                         return;   // No more primitives to process
725                                 }
726
727                                 draw = drawList[currentDraw % DRAW_COUNT];
728                         }
729
730                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
731                         {
732                                 int primitive = draw->primitive;
733                                 int count = draw->count;
734                                 int batch = draw->batchSize;
735
736                                 primitiveProgress[unit].drawCall = currentDraw;
737                                 primitiveProgress[unit].firstPrimitive = primitive;
738                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
739
740                                 draw->primitive += batch;
741
742                                 Task &task = taskQueue[qHead];
743                                 task.type = Task::PRIMITIVES;
744                                 task.primitiveUnit = unit;
745
746                                 primitiveProgress[unit].references = -1;
747
748                                 // Commit to the task queue
749                                 qHead = (qHead + 1) % 32;
750                                 qSize++;
751                         }
752                 }
753         }
754
755         void Renderer::scheduleTask(int threadIndex)
756         {
757                 schedulerMutex.lock();
758
759                 if((int)qSize < threadCount - threadsAwake + 1)
760                 {
761                         findAvailableTasks();
762                 }
763
764                 if(qSize != 0)
765                 {
766                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
767                         qSize--;
768
769                         if(threadsAwake != threadCount)
770                         {
771                                 int wakeup = qSize - threadsAwake + 1;
772
773                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
774                                 {
775                                         if(task[i].type == Task::SUSPEND)
776                                         {
777                                                 suspend[i]->wait();
778                                                 task[i].type = Task::RESUME;
779                                                 resume[i]->signal();
780
781                                                 threadsAwake++;
782                                                 wakeup--;
783                                         }
784                                 }
785                         }
786                 }
787                 else
788                 {
789                         task[threadIndex].type = Task::SUSPEND;
790
791                         threadsAwake--;
792                 }
793
794                 schedulerMutex.unlock();
795         }
796
797         void Renderer::executeTask(int threadIndex)
798         {
799                 #if PERF_HUD
800                         int64_t startTick = Timer::ticks();
801                 #endif
802
803                 switch(task[threadIndex].type)
804                 {
805                 case Task::PRIMITIVES:
806                         {
807                                 int unit = task[threadIndex].primitiveUnit;
808                                 
809                                 int input = primitiveProgress[unit].firstPrimitive;
810                                 int count = primitiveProgress[unit].primitiveCount;
811                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
812                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
813
814                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
815
816                                 #if PERF_HUD
817                                         int64_t time = Timer::ticks();
818                                         vertexTime[threadIndex] += time - startTick;
819                                         startTick = time;
820                                 #endif
821
822                                 int visible = setupPrimitives(this, unit, count);
823
824                                 primitiveProgress[unit].visible = visible;
825                                 primitiveProgress[unit].references = clusterCount;
826
827                                 #if PERF_HUD
828                                         setupTime[threadIndex] += Timer::ticks() - startTick;
829                                 #endif
830                         }
831                         break;
832                 case Task::PIXELS:
833                         {
834                                 int unit = task[threadIndex].primitiveUnit;
835                                 int visible = primitiveProgress[unit].visible;
836
837                                 if(visible > 0)
838                                 {
839                                         int cluster = task[threadIndex].pixelCluster;
840                                         Primitive *primitive = primitiveBatch[unit];
841                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
842                                         DrawData *data = draw->data;
843                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
844
845                                         pixelRoutine(primitive, visible, cluster, data);
846                                 }
847
848                                 finishRendering(task[threadIndex]);
849
850                                 #if PERF_HUD
851                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
852                                 #endif
853                         }
854                         break;
855                 case Task::RESUME:
856                         break;
857                 case Task::SUSPEND:
858                         break;
859                 default:
860                         ASSERT(false);
861                 }
862         }
863
864         void Renderer::synchronize()
865         {
866                 sync->lock(sw::PUBLIC);
867                 sync->unlock();
868         }
869
870         void Renderer::finishRendering(Task &pixelTask)
871         {
872                 int unit = pixelTask.primitiveUnit;
873                 int cluster = pixelTask.pixelCluster;
874
875                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
876                 DrawData &data = *draw.data;
877                 int primitive = primitiveProgress[unit].firstPrimitive;
878                 int count = primitiveProgress[unit].primitiveCount;
879
880                 pixelProgress[cluster].processedPrimitives = primitive + count;
881
882                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
883                 {
884                         pixelProgress[cluster].drawCall++;
885                         pixelProgress[cluster].processedPrimitives = 0;
886                 }
887
888                 int ref = atomicDecrement(&primitiveProgress[unit].references);
889
890                 if(ref == 0)
891                 {
892                         ref = atomicDecrement(&draw.references);
893
894                         if(ref == 0)
895                         {
896                                 #if PERF_PROFILE
897                                         for(int cluster = 0; cluster < clusterCount; cluster++)
898                                         {
899                                                 for(int i = 0; i < PERF_TIMERS; i++)
900                                                 {
901                                                         profiler.cycles[i] += data.cycles[i][cluster];
902                                                 }
903                                         }
904                                 #endif
905
906                                 if(draw.queries)
907                                 {
908                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
909                                         {
910                                                 Query *query = *q;
911
912                                                 for(int cluster = 0; cluster < clusterCount; cluster++)
913                                                 {
914                                                         atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
915                                                 }
916
917                                                 atomicDecrement(&query->reference);
918                                         }
919
920                                         delete draw.queries;
921                                         draw.queries = 0;
922                                 }
923
924                                 for(int i = 0; i < RENDERTARGETS; i++)
925                                 {
926                                         if(draw.renderTarget[i])
927                                         {
928                                                 draw.renderTarget[i]->unlockInternal();
929                                         }
930                                 }
931
932                                 if(draw.depthStencil)
933                                 {
934                                         draw.depthStencil->unlockInternal();
935                                         draw.depthStencil->unlockStencil();
936                                 }
937
938                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
939                                 {
940                                         if(draw.texture[i])
941                                         {
942                                                 draw.texture[i]->unlock();
943                                         }
944                                 }
945
946                                 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
947                                 {
948                                         if(draw.vertexStream[i])
949                                         {
950                                                 draw.vertexStream[i]->unlock();
951                                         }
952                                 }
953
954                                 if(draw.indexBuffer)
955                                 {
956                                         draw.indexBuffer->unlock();
957                                 }
958
959                                 draw.vertexRoutine->unbind();
960                                 draw.setupRoutine->unbind();
961                                 draw.pixelRoutine->unbind();
962
963                                 sync->unlock();
964
965                                 draw.references = -1;
966                                 resumeApp->signal();
967                         }
968                 }
969
970                 pixelProgress[cluster].executing = false;
971         }
972
973         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
974         {
975                 Triangle *triangle = triangleBatch[unit];
976                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
977                 DrawData *data = draw->data;
978                 VertexTask *task = vertexTask[thread];
979
980                 const void *indices = data->indices;
981                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
982
983                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
984                 {
985                         task->vertexCache.clear();
986                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
987                 }
988
989                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
990
991                 switch(draw->drawType)
992                 {
993                 case DRAW_POINTLIST:
994                         {
995                                 unsigned int index = start;
996
997                                 for(unsigned int i = 0; i < triangleCount; i++)
998                                 {
999                                         batch[i][0] = index;
1000                                         batch[i][1] = index;
1001                                         batch[i][2] = index;
1002
1003                                         index += 1;
1004                                 }
1005                         }
1006                         break;
1007                 case DRAW_LINELIST:
1008                         {
1009                                 unsigned int index = 2 * start;
1010
1011                                 for(unsigned int i = 0; i < triangleCount; i++)
1012                                 {
1013                                         batch[i][0] = index + 0;
1014                                         batch[i][1] = index + 1;
1015                                         batch[i][2] = index + 1;
1016
1017                                         index += 2;
1018                                 }
1019                         }
1020                         break;
1021                 case DRAW_LINESTRIP:
1022                         {
1023                                 unsigned int index = start;
1024
1025                                 for(unsigned int i = 0; i < triangleCount; i++)
1026                                 {
1027                                         batch[i][0] = index + 0;
1028                                         batch[i][1] = index + 1;
1029                                         batch[i][2] = index + 1;
1030
1031                                         index += 1;
1032                                 }
1033                         }
1034                         break;
1035                 case DRAW_LINELOOP:
1036                         {
1037                                 unsigned int index = start;
1038
1039                                 for(unsigned int i = 0; i < triangleCount; i++)
1040                                 {
1041                                         batch[i][0] = (index + 0) % loop;
1042                                         batch[i][1] = (index + 1) % loop;
1043                                         batch[i][2] = (index + 1) % loop;
1044
1045                                         index += 1;
1046                                 }
1047                         }
1048                         break;
1049                 case DRAW_TRIANGLELIST:
1050                         {
1051                                 unsigned int index = 3 * start;
1052
1053                                 for(unsigned int i = 0; i < triangleCount; i++)
1054                                 {
1055                                         batch[i][0] = index + 0;
1056                                         batch[i][1] = index + 1;
1057                                         batch[i][2] = index + 2;
1058
1059                                         index += 3;
1060                                 }
1061                         }
1062                         break;
1063                 case DRAW_TRIANGLESTRIP:
1064                         {
1065                                 unsigned int index = start;
1066
1067                                 for(unsigned int i = 0; i < triangleCount; i++)
1068                                 {
1069                                         batch[i][0] = index + 0;
1070                                         batch[i][1] = index + (index & 1) + 1;
1071                                         batch[i][2] = index + (~index & 1) + 1;
1072
1073                                         index += 1;
1074                                 }
1075                         }
1076                         break;
1077                 case DRAW_TRIANGLEFAN:
1078                         {
1079                                 unsigned int index = start;
1080
1081                                 for(unsigned int i = 0; i < triangleCount; i++)
1082                                 {
1083                                         batch[i][0] = index + 1;
1084                                         batch[i][1] = index + 2;
1085                                         batch[i][2] = 0;
1086
1087                                         index += 1;
1088                                 }
1089                         }
1090                         break;
1091                 case DRAW_INDEXEDPOINTLIST8:
1092                         {
1093                                 const unsigned char *index = (const unsigned char*)indices + start;
1094
1095                                 for(unsigned int i = 0; i < triangleCount; i++)
1096                                 {
1097                                         batch[i][0] = *index;
1098                                         batch[i][1] = *index;
1099                                         batch[i][2] = *index;
1100
1101                                         index += 1;
1102                                 }
1103                         }
1104                         break;
1105                 case DRAW_INDEXEDPOINTLIST16:
1106                         {
1107                                 const unsigned short *index = (const unsigned short*)indices + start;
1108
1109                                 for(unsigned int i = 0; i < triangleCount; i++)
1110                                 {
1111                                         batch[i][0] = *index;
1112                                         batch[i][1] = *index;
1113                                         batch[i][2] = *index;
1114
1115                                         index += 1;
1116                                 }
1117                         }
1118                         break;
1119                 case DRAW_INDEXEDPOINTLIST32:
1120                         {
1121                                 const unsigned int *index = (const unsigned int*)indices + start;
1122
1123                                 for(unsigned int i = 0; i < triangleCount; i++)
1124                                 {
1125                                         batch[i][0] = *index;
1126                                         batch[i][1] = *index;
1127                                         batch[i][2] = *index;
1128
1129                                         index += 1;
1130                                 }
1131                         }
1132                         break;
1133                 case DRAW_INDEXEDLINELIST8:
1134                         {
1135                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1136
1137                                 for(unsigned int i = 0; i < triangleCount; i++)
1138                                 {
1139                                         batch[i][0] = index[0];
1140                                         batch[i][1] = index[1];
1141                                         batch[i][2] = index[1];
1142
1143                                         index += 2;
1144                                 }
1145                         }
1146                         break;
1147                 case DRAW_INDEXEDLINELIST16:
1148                         {
1149                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1150
1151                                 for(unsigned int i = 0; i < triangleCount; i++)
1152                                 {
1153                                         batch[i][0] = index[0];
1154                                         batch[i][1] = index[1];
1155                                         batch[i][2] = index[1];
1156
1157                                         index += 2;
1158                                 }
1159                         }
1160                         break;
1161                 case DRAW_INDEXEDLINELIST32:
1162                         {
1163                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1164
1165                                 for(unsigned int i = 0; i < triangleCount; i++)
1166                                 {
1167                                         batch[i][0] = index[0];
1168                                         batch[i][1] = index[1];
1169                                         batch[i][2] = index[1];
1170
1171                                         index += 2;
1172                                 }
1173                         }
1174                         break;
1175                 case DRAW_INDEXEDLINESTRIP8:
1176                         {
1177                                 const unsigned char *index = (const unsigned char*)indices + start;
1178
1179                                 for(unsigned int i = 0; i < triangleCount; i++)
1180                                 {
1181                                         batch[i][0] = index[0];
1182                                         batch[i][1] = index[1];
1183                                         batch[i][2] = index[1];
1184
1185                                         index += 1;
1186                                 }
1187                         }
1188                         break;
1189                 case DRAW_INDEXEDLINESTRIP16:
1190                         {
1191                                 const unsigned short *index = (const unsigned short*)indices + start;
1192
1193                                 for(unsigned int i = 0; i < triangleCount; i++)
1194                                 {
1195                                         batch[i][0] = index[0];
1196                                         batch[i][1] = index[1];
1197                                         batch[i][2] = index[1];
1198
1199                                         index += 1;
1200                                 }
1201                         }
1202                         break;
1203                 case DRAW_INDEXEDLINESTRIP32:
1204                         {
1205                                 const unsigned int *index = (const unsigned int*)indices + start;
1206
1207                                 for(unsigned int i = 0; i < triangleCount; i++)
1208                                 {
1209                                         batch[i][0] = index[0];
1210                                         batch[i][1] = index[1];
1211                                         batch[i][2] = index[1];
1212
1213                                         index += 1;
1214                                 }
1215                         }
1216                         break;
1217                 case DRAW_INDEXEDLINELOOP8:
1218                         {
1219                                 const unsigned char *index = (const unsigned char*)indices;
1220
1221                                 for(unsigned int i = 0; i < triangleCount; i++)
1222                                 {
1223                                         batch[i][0] = index[(start + i + 0) % loop];
1224                                         batch[i][1] = index[(start + i + 1) % loop];
1225                                         batch[i][2] = index[(start + i + 1) % loop];
1226                                 }
1227                         }
1228                         break;
1229                 case DRAW_INDEXEDLINELOOP16:
1230                         {
1231                                 const unsigned short *index = (const unsigned short*)indices;
1232
1233                                 for(unsigned int i = 0; i < triangleCount; i++)
1234                                 {
1235                                         batch[i][0] = index[(start + i + 0) % loop];
1236                                         batch[i][1] = index[(start + i + 1) % loop];
1237                                         batch[i][2] = index[(start + i + 1) % loop];
1238                                 }
1239                         }
1240                         break;
1241                 case DRAW_INDEXEDLINELOOP32:
1242                         {
1243                                 const unsigned int *index = (const unsigned int*)indices;
1244
1245                                 for(unsigned int i = 0; i < triangleCount; i++)
1246                                 {
1247                                         batch[i][0] = index[(start + i + 0) % loop];
1248                                         batch[i][1] = index[(start + i + 1) % loop];
1249                                         batch[i][2] = index[(start + i + 1) % loop];
1250                                 }
1251                         }
1252                         break;
1253                 case DRAW_INDEXEDTRIANGLELIST8:
1254                         {
1255                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1256
1257                                 for(unsigned int i = 0; i < triangleCount; i++)
1258                                 {
1259                                         batch[i][0] = index[0];
1260                                         batch[i][1] = index[1];
1261                                         batch[i][2] = index[2];
1262
1263                                         index += 3;
1264                                 }
1265                         }
1266                         break;
1267                 case DRAW_INDEXEDTRIANGLELIST16:
1268                         {
1269                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1270
1271                                 for(unsigned int i = 0; i < triangleCount; i++)
1272                                 {
1273                                         batch[i][0] = index[0];
1274                                         batch[i][1] = index[1];
1275                                         batch[i][2] = index[2];
1276
1277                                         index += 3;
1278                                 }
1279                         }
1280                         break;
1281                 case DRAW_INDEXEDTRIANGLELIST32:
1282                         {
1283                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1284
1285                                 for(unsigned int i = 0; i < triangleCount; i++)
1286                                 {
1287                                         batch[i][0] = index[0];
1288                                         batch[i][1] = index[1];
1289                                         batch[i][2] = index[2];
1290
1291                                         index += 3;
1292                                 }
1293                         }
1294                         break;
1295                 case DRAW_INDEXEDTRIANGLESTRIP8:
1296                         {
1297                                 const unsigned char *index = (const unsigned char*)indices + start;
1298
1299                                 for(unsigned int i = 0; i < triangleCount; i++)
1300                                 {
1301                                         batch[i][0] = index[0];
1302                                         batch[i][1] = index[((start + i) & 1) + 1];
1303                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1304
1305                                         index += 1;
1306                                 }
1307                         }
1308                         break;
1309                 case DRAW_INDEXEDTRIANGLESTRIP16:
1310                         {
1311                                 const unsigned short *index = (const unsigned short*)indices + start;
1312
1313                                 for(unsigned int i = 0; i < triangleCount; i++)
1314                                 {
1315                                         batch[i][0] = index[0];
1316                                         batch[i][1] = index[((start + i) & 1) + 1];
1317                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1318
1319                                         index += 1;
1320                                 }
1321                         }
1322                         break;
1323                 case DRAW_INDEXEDTRIANGLESTRIP32:
1324                         {
1325                                 const unsigned int *index = (const unsigned int*)indices + start;
1326
1327                                 for(unsigned int i = 0; i < triangleCount; i++)
1328                                 {
1329                                         batch[i][0] = index[0];
1330                                         batch[i][1] = index[((start + i) & 1) + 1];
1331                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1332
1333                                         index += 1;
1334                                 }
1335                         }
1336                         break;
1337                 case DRAW_INDEXEDTRIANGLEFAN8:
1338                         {
1339                                 const unsigned char *index = (const unsigned char*)indices;
1340
1341                                 for(unsigned int i = 0; i < triangleCount; i++)
1342                                 {
1343                                         batch[i][0] = index[start + i + 1];
1344                                         batch[i][1] = index[start + i + 2];
1345                                         batch[i][2] = index[0];
1346                                 }
1347                         }
1348                         break;
1349                 case DRAW_INDEXEDTRIANGLEFAN16:
1350                         {
1351                                 const unsigned short *index = (const unsigned short*)indices;
1352
1353                                 for(unsigned int i = 0; i < triangleCount; i++)
1354                                 {
1355                                         batch[i][0] = index[start + i + 1];
1356                                         batch[i][1] = index[start + i + 2];
1357                                         batch[i][2] = index[0];
1358                                 }
1359                         }
1360                         break;
1361                 case DRAW_INDEXEDTRIANGLEFAN32:
1362                         {
1363                                 const unsigned int *index = (const unsigned int*)indices;
1364
1365                                 for(unsigned int i = 0; i < triangleCount; i++)
1366                                 {
1367                                         batch[i][0] = index[start + i + 1];
1368                                         batch[i][1] = index[start + i + 2];
1369                                         batch[i][2] = index[0];
1370                                 }
1371                         }
1372                         break;
1373         case DRAW_QUADLIST:
1374                         {
1375                                 unsigned int index = 4 * start / 2;
1376
1377                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1378                                 {
1379                                         batch[i+0][0] = index + 0;
1380                                         batch[i+0][1] = index + 1;
1381                                         batch[i+0][2] = index + 2;
1382
1383                     batch[i+1][0] = index + 0;
1384                                         batch[i+1][1] = index + 2;
1385                                         batch[i+1][2] = index + 3;
1386
1387                                         index += 4;
1388                                 }
1389                         }
1390                         break;
1391                 default:
1392                         ASSERT(false);
1393                 }
1394
1395                 task->vertexCount = triangleCount * 3;
1396                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1397         }
1398
1399         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1400         {
1401                 Triangle *triangle = renderer->triangleBatch[unit];
1402                 Primitive *primitive = renderer->primitiveBatch[unit];
1403
1404                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1405                 SetupProcessor::State &state = draw.setupState;
1406                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1407
1408                 int ms = state.multiSample;
1409                 int pos = state.positionRegister;
1410                 const DrawData *data = draw.data;
1411                 int visible = 0;
1412
1413                 for(int i = 0; i < count; i++, triangle++)
1414                 {
1415                         Vertex &v0 = triangle->v0;
1416                         Vertex &v1 = triangle->v1;
1417                         Vertex &v2 = triangle->v2;
1418
1419                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1420                         {
1421                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1422
1423                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1424
1425                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1426                                 {
1427                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1428                                         {
1429                                                 continue;
1430                                         }
1431                                 }
1432
1433                                 if(setupRoutine(primitive, triangle, &polygon, data))
1434                                 {
1435                                         primitive += ms;
1436                                         visible++;
1437                                 }
1438                         }
1439                 }
1440
1441                 return visible;
1442         }
1443
1444         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1445         {
1446                 Triangle *triangle = renderer->triangleBatch[unit];
1447                 Primitive *primitive = renderer->primitiveBatch[unit];
1448                 int visible = 0;
1449
1450                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1451                 SetupProcessor::State &state = draw.setupState;
1452                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1453
1454                 const Vertex &v0 = triangle[0].v0;
1455                 const Vertex &v1 = triangle[0].v1;
1456                 const Vertex &v2 = triangle[0].v2;
1457
1458                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1459
1460                 if(state.cullMode == CULL_CLOCKWISE)
1461                 {
1462                         if(d >= 0) return 0;
1463                 }
1464                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1465                 {
1466                         if(d <= 0) return 0;
1467                 }
1468
1469                 // Copy attributes
1470                 triangle[1].v0 = v1;
1471                 triangle[1].v1 = v2;
1472                 triangle[2].v0 = v2;
1473                 triangle[2].v1 = v0;
1474
1475                 if(state.color[0][0].flat)   // FIXME
1476                 {
1477                         for(int i = 0; i < 2; i++)
1478                         {
1479                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1480                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1481                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1482                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1483                         }
1484                 }
1485
1486                 for(int i = 0; i < 3; i++)
1487                 {
1488                         if(setupLine(renderer, *primitive, *triangle, draw))
1489                         {
1490                                 primitive->area = 0.5f * d;
1491
1492                                 primitive++;
1493                                 visible++;
1494                         }
1495
1496                         triangle++;
1497                 }
1498
1499                 return visible;
1500         }
1501         
1502         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1503         {
1504                 Triangle *triangle = renderer->triangleBatch[unit];
1505                 Primitive *primitive = renderer->primitiveBatch[unit];
1506                 int visible = 0;
1507
1508                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1509                 SetupProcessor::State &state = draw.setupState;
1510
1511                 const Vertex &v0 = triangle[0].v0;
1512                 const Vertex &v1 = triangle[0].v1;
1513                 const Vertex &v2 = triangle[0].v2;
1514
1515                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1516
1517                 if(state.cullMode == CULL_CLOCKWISE)
1518                 {
1519                         if(d >= 0) return 0;
1520                 }
1521                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1522                 {
1523                         if(d <= 0) return 0;
1524                 }
1525
1526                 // Copy attributes
1527                 triangle[1].v0 = v1;
1528                 triangle[2].v0 = v2;
1529
1530                 for(int i = 0; i < 3; i++)
1531                 {
1532                         if(setupPoint(renderer, *primitive, *triangle, draw))
1533                         {
1534                                 primitive->area = 0.5f * d;
1535
1536                                 primitive++;
1537                                 visible++;
1538                         }
1539
1540                         triangle++;
1541                 }
1542
1543                 return visible;
1544         }
1545
1546         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1547         {
1548                 Triangle *triangle = renderer->triangleBatch[unit];
1549                 Primitive *primitive = renderer->primitiveBatch[unit];
1550                 int visible = 0;
1551
1552                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1553                 SetupProcessor::State &state = draw.setupState;
1554
1555                 int ms = state.multiSample;
1556
1557                 for(int i = 0; i < count; i++)
1558                 {
1559                         if(setupLine(renderer, *primitive, *triangle, draw))
1560                         {
1561                                 primitive += ms;
1562                                 visible++;
1563                         }
1564
1565                         triangle++;
1566                 }
1567
1568                 return visible;
1569         }
1570
1571         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1572         {
1573                 Triangle *triangle = renderer->triangleBatch[unit];
1574                 Primitive *primitive = renderer->primitiveBatch[unit];
1575                 int visible = 0;
1576
1577                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1578                 SetupProcessor::State &state = draw.setupState;
1579
1580                 int ms = state.multiSample;
1581
1582                 for(int i = 0; i < count; i++)
1583                 {
1584                         if(setupPoint(renderer, *primitive, *triangle, draw))
1585                         {
1586                                 primitive += ms;
1587                                 visible++;
1588                         }
1589
1590                         triangle++;
1591                 }
1592
1593                 return visible;
1594         }
1595
1596         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1597         {
1598                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1599                 const SetupProcessor::State &state = draw.setupState;
1600                 const DrawData &data = *draw.data;
1601
1602                 float lineWidth = data.lineWidth;
1603
1604                 Vertex &v0 = triangle.v0;
1605                 Vertex &v1 = triangle.v1;
1606
1607                 int pos = state.positionRegister;
1608
1609                 const float4 &P0 = v0.v[pos];
1610                 const float4 &P1 = v1.v[pos];
1611
1612                 if(P0.w <= 0 && P1.w <= 0)
1613                 {
1614                         return false;
1615                 }
1616
1617                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1618                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1619
1620                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1621                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1622
1623                 if(dx == 0 && dy == 0)
1624                 {
1625                         return false;
1626                 }
1627
1628                 if(false)   // Rectangle
1629                 {
1630                         float4 P[4];
1631                         int C[4];
1632
1633                         P[0] = P0;
1634                         P[1] = P1;
1635                         P[2] = P1;
1636                         P[3] = P0;
1637
1638                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1639
1640                         dx *= scale;
1641                         dy *= scale;
1642
1643                         float dx0w = dx * P0.w / W;
1644                         float dy0h = dy * P0.w / H;
1645                         float dx0h = dx * P0.w / H;
1646                         float dy0w = dy * P0.w / W;
1647
1648                         float dx1w = dx * P1.w / W;
1649                         float dy1h = dy * P1.w / H;
1650                         float dx1h = dx * P1.w / H;
1651                         float dy1w = dy * P1.w / W;
1652
1653                         P[0].x += -dy0w + -dx0w;
1654                         P[0].y += -dx0h + +dy0h;
1655                         C[0] = computeClipFlags(P[0], data);
1656
1657                         P[1].x += -dy1w + +dx1w;
1658                         P[1].y += -dx1h + +dy1h;
1659                         C[1] = computeClipFlags(P[1], data);
1660
1661                         P[2].x += +dy1w + +dx1w;
1662                         P[2].y += +dx1h + -dy1h;
1663                         C[2] = computeClipFlags(P[2], data);
1664
1665                         P[3].x += +dy0w + -dx0w;
1666                         P[3].y += +dx0h + +dy0h;
1667                         C[3] = computeClipFlags(P[3], data);
1668
1669                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1670                         {
1671                                 Polygon polygon(P, 4);
1672
1673                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1674
1675                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1676                                 {
1677                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1678                                         {
1679                                                 return false;
1680                                         }
1681                                 }
1682
1683                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1684                         }
1685                 }
1686                 else   // Diamond test convention
1687                 {
1688                         float4 P[8];
1689                         int C[8];
1690
1691                         P[0] = P0;
1692                         P[1] = P0;
1693                         P[2] = P0;
1694                         P[3] = P0;
1695                         P[4] = P1;
1696                         P[5] = P1;
1697                         P[6] = P1;
1698                         P[7] = P1;
1699
1700                         float dx0 = lineWidth * 0.5f * P0.w / W;
1701                         float dy0 = lineWidth * 0.5f * P0.w / H;
1702
1703                         float dx1 = lineWidth * 0.5f * P1.w / W;
1704                         float dy1 = lineWidth * 0.5f * P1.w / H;
1705
1706                         P[0].x += -dx0;
1707                         C[0] = computeClipFlags(P[0], data);
1708
1709                         P[1].y += +dy0;
1710                         C[1] = computeClipFlags(P[1], data);
1711
1712                         P[2].x += +dx0;
1713                         C[2] = computeClipFlags(P[2], data);
1714
1715                         P[3].y += -dy0;
1716                         C[3] = computeClipFlags(P[3], data);
1717
1718                         P[4].x += -dx1;
1719                         C[4] = computeClipFlags(P[4], data);
1720
1721                         P[5].y += +dy1;
1722                         C[5] = computeClipFlags(P[5], data);
1723
1724                         P[6].x += +dx1;
1725                         C[6] = computeClipFlags(P[6], data);
1726
1727                         P[7].y += -dy1;
1728                         C[7] = computeClipFlags(P[7], data);
1729
1730                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1731                         {
1732                                 float4 L[6];
1733
1734                                 if(dx > -dy)
1735                                 {
1736                                         if(dx > dy)   // Right
1737                                         {
1738                                                 L[0] = P[0];
1739                                                 L[1] = P[1];
1740                                                 L[2] = P[5];
1741                                                 L[3] = P[6];
1742                                                 L[4] = P[7];
1743                                                 L[5] = P[3];
1744                                         }
1745                                         else   // Down
1746                                         {
1747                                                 L[0] = P[0];
1748                                                 L[1] = P[4];
1749                                                 L[2] = P[5];
1750                                                 L[3] = P[6];
1751                                                 L[4] = P[2];
1752                                                 L[5] = P[3];
1753                                         }
1754                                 }
1755                                 else
1756                                 {
1757                                         if(dx > dy)   // Up
1758                                         {
1759                                                 L[0] = P[0];
1760                                                 L[1] = P[1];
1761                                                 L[2] = P[2];
1762                                                 L[3] = P[6];
1763                                                 L[4] = P[7];
1764                                                 L[5] = P[4];
1765                                         }
1766                                         else   // Left
1767                                         {
1768                                                 L[0] = P[1];
1769                                                 L[1] = P[2];
1770                                                 L[2] = P[3];
1771                                                 L[3] = P[7];
1772                                                 L[4] = P[4];
1773                                                 L[5] = P[5];
1774                                         }
1775                                 }
1776
1777                                 Polygon polygon(L, 6);
1778
1779                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1780
1781                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1782                                 {
1783                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1784                                         {
1785                                                 return false;
1786                                         }
1787                                 }
1788
1789                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1790                         }
1791                 }
1792
1793                 return false;
1794         }
1795
1796         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1797         {
1798                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1799                 const SetupProcessor::State &state = draw.setupState;
1800                 const DrawData &data = *draw.data;
1801
1802                 Vertex &v = triangle.v0;
1803
1804                 float pSize;
1805
1806                 int pts = state.pointSizeRegister;
1807
1808                 if(state.pointSizeRegister != 0xF)
1809                 {
1810                         pSize = v.v[pts].y;
1811                 }
1812                 else
1813                 {
1814                         pSize = data.point.pointSize[0];
1815                 }
1816
1817                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1818
1819                 float4 P[4];
1820                 int C[4];
1821
1822                 int pos = state.positionRegister;
1823
1824                 P[0] = v.v[pos];
1825                 P[1] = v.v[pos];
1826                 P[2] = v.v[pos];
1827                 P[3] = v.v[pos];
1828
1829                 const float X = pSize * P[0].w * data.halfPixelX[0];
1830                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1831
1832                 P[0].x -= X;
1833                 P[0].y += Y;
1834                 C[0] = computeClipFlags(P[0], data);
1835
1836                 P[1].x += X;
1837                 P[1].y += Y;
1838                 C[1] = computeClipFlags(P[1], data);
1839
1840                 P[2].x += X;
1841                 P[2].y -= Y;
1842                 C[2] = computeClipFlags(P[2], data);
1843
1844                 P[3].x -= X;
1845                 P[3].y -= Y;
1846                 C[3] = computeClipFlags(P[3], data);
1847
1848                 triangle.v1 = triangle.v0;
1849                 triangle.v2 = triangle.v0;
1850
1851                 triangle.v1.X += iround(16 * 0.5f * pSize);
1852                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1853
1854                 Polygon polygon(P, 4);
1855
1856                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1857                 {
1858                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1859
1860                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1861                         {
1862                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1863                                 {
1864                                         return false;
1865                                 }
1866                         }
1867                         
1868                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1869                 }
1870
1871                 return false;
1872         }
1873
1874         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1875         {
1876                 return ((v.x > v.w)  << 0) |
1877                            ((v.y > v.w)  << 1) |
1878                            ((v.z > v.w)  << 2) |
1879                            ((v.x < -v.w) << 3) |
1880                        ((v.y < -v.w) << 4) |
1881                            ((v.z < 0)    << 5) |
1882                            Clipper::CLIP_FINITE;   // FIXME: xyz finite
1883         }
1884
1885         void Renderer::initializeThreads()
1886         {
1887                 unitCount = ceilPow2(threadCount);
1888                 clusterCount = ceilPow2(threadCount);
1889
1890                 for(int i = 0; i < unitCount; i++)
1891                 {
1892                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1893                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1894                 }
1895
1896                 for(int i = 0; i < threadCount; i++)
1897                 {
1898                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1899                         vertexTask[i]->vertexCache.drawCall = -1;
1900
1901                         task[i].type = Task::SUSPEND;
1902
1903                         resume[i] = new Event();
1904                         suspend[i] = new Event();
1905
1906                         Parameters parameters;
1907                         parameters.threadIndex = i;
1908                         parameters.renderer = this;
1909
1910                         exitThreads = false;
1911                         worker[i] = new Thread(threadFunction, &parameters);
1912
1913                         suspend[i]->wait();
1914                         suspend[i]->signal();
1915                 }
1916         }
1917
1918         void Renderer::terminateThreads()
1919         {
1920                 while(threadsAwake != 0)
1921                 {
1922                         Thread::sleep(1);
1923                 }
1924
1925                 for(int thread = 0; thread < threadCount; thread++)
1926                 {
1927                         if(worker[thread])
1928                         {
1929                                 exitThreads = true;
1930                                 resume[thread]->signal();
1931                                 worker[thread]->join();
1932                                 
1933                                 delete worker[thread];
1934                                 worker[thread] = 0;
1935                                 delete resume[thread];
1936                                 resume[thread] = 0;
1937                                 delete suspend[thread];
1938                                 suspend[thread] = 0;
1939                         }
1940                 
1941                         deallocate(vertexTask[thread]);
1942                         vertexTask[thread] = 0;
1943                 }
1944
1945                 for(int i = 0; i < 16; i++)
1946                 {
1947                         deallocate(triangleBatch[i]);
1948                         triangleBatch[i] = 0;
1949
1950                         deallocate(primitiveBatch[i]);
1951                         primitiveBatch[i] = 0;
1952                 }
1953         }
1954
1955         void Renderer::loadConstants(const VertexShader *vertexShader)
1956         {
1957                 if(!vertexShader) return;
1958
1959                 size_t count = vertexShader->getLength();
1960
1961                 for(size_t i = 0; i < count; i++)
1962                 {
1963                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1964
1965                         if(instruction->opcode == Shader::OPCODE_DEF)
1966                         {
1967                                 int index = instruction->dst.index;
1968                                 float value[4];
1969
1970                                 value[0] = instruction->src[0].value[0];
1971                                 value[1] = instruction->src[0].value[1];
1972                                 value[2] = instruction->src[0].value[2];
1973                                 value[3] = instruction->src[0].value[3];
1974
1975                                 setVertexShaderConstantF(index, value);
1976                         }
1977                         else if(instruction->opcode == Shader::OPCODE_DEFI)
1978                         {
1979                                 int index = instruction->dst.index;
1980                                 int integer[4];
1981
1982                                 integer[0] = instruction->src[0].integer[0];
1983                                 integer[1] = instruction->src[0].integer[1];
1984                                 integer[2] = instruction->src[0].integer[2];
1985                                 integer[3] = instruction->src[0].integer[3];
1986
1987                                 setVertexShaderConstantI(index, integer);
1988                         }
1989                         else if(instruction->opcode == Shader::OPCODE_DEFB)
1990                         {
1991                                 int index = instruction->dst.index;
1992                                 int boolean = instruction->src[0].boolean[0];
1993
1994                                 setVertexShaderConstantB(index, &boolean);
1995                         }
1996                 }
1997         }
1998
1999         void Renderer::loadConstants(const PixelShader *pixelShader)
2000         {
2001                 if(!pixelShader) return;
2002
2003                 size_t count = pixelShader->getLength();
2004
2005                 for(size_t i = 0; i < count; i++)
2006                 {
2007                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2008
2009                         if(instruction->opcode == Shader::OPCODE_DEF)
2010                         {
2011                                 int index = instruction->dst.index;
2012                                 float value[4];
2013
2014                                 value[0] = instruction->src[0].value[0];
2015                                 value[1] = instruction->src[0].value[1];
2016                                 value[2] = instruction->src[0].value[2];
2017                                 value[3] = instruction->src[0].value[3];
2018
2019                                 setPixelShaderConstantF(index, value);
2020                         }
2021                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2022                         {
2023                                 int index = instruction->dst.index;
2024                                 int integer[4];
2025
2026                                 integer[0] = instruction->src[0].integer[0];
2027                                 integer[1] = instruction->src[0].integer[1];
2028                                 integer[2] = instruction->src[0].integer[2];
2029                                 integer[3] = instruction->src[0].integer[3];
2030
2031                                 setPixelShaderConstantI(index, integer);
2032                         }
2033                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2034                         {
2035                                 int index = instruction->dst.index;
2036                                 int boolean = instruction->src[0].boolean[0];
2037
2038                                 setPixelShaderConstantB(index, &boolean);
2039                         }
2040                 }
2041         }
2042
2043         void Renderer::setIndexBuffer(Resource *indexBuffer)
2044         {
2045                 context->indexBuffer = indexBuffer;
2046         }
2047
2048         void Renderer::setMultiSampleMask(unsigned int mask)
2049         {
2050                 context->sampleMask = mask;
2051         }
2052
2053         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2054         {
2055                 sw::transparencyAntialiasing = transparencyAntialiasing;
2056         }
2057
2058         bool Renderer::isReadWriteTexture(int sampler)
2059         {
2060                 for(int index = 0; index < RENDERTARGETS; index++)
2061                 {
2062                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2063                         {
2064                                 return true;
2065                         }
2066                 }
2067         
2068                 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2069                 {
2070                         return true;
2071                 }
2072
2073                 return false;
2074         }
2075         
2076         void Renderer::updateClipper()
2077         {
2078                 if(updateClipPlanes)
2079                 {
2080                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2081                         {
2082                                 const Matrix &scissorWorld = getViewTransform();
2083
2084                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2085                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2086                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2087                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2088                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2089                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2090                         }
2091                         else   // User plane in clip space
2092                         {
2093                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2094                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2095                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2096                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2097                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2098                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2099                         }
2100
2101                         updateClipPlanes = false;
2102                 }
2103         }
2104
2105         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2106         {
2107                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2108
2109                 context->texture[sampler] = resource;
2110         }
2111
2112         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2113         {
2114                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2115                 
2116                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2117         }
2118
2119         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2120         {
2121                 if(type == SAMPLER_PIXEL)
2122                 {
2123                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2124                 }
2125                 else
2126                 {
2127                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2128                 }
2129         }
2130
2131         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2132         {
2133                 if(type == SAMPLER_PIXEL)
2134                 {
2135                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2136                 }
2137                 else
2138                 {
2139                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2140                 }
2141         }
2142
2143         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2144         {
2145                 if(type == SAMPLER_PIXEL)
2146                 {
2147                         PixelProcessor::setGatherEnable(sampler, enable);
2148                 }
2149                 else
2150                 {
2151                         VertexProcessor::setGatherEnable(sampler, enable);
2152                 }
2153         }
2154
2155         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2156         {
2157                 if(type == SAMPLER_PIXEL)
2158                 {
2159                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2160                 }
2161                 else
2162                 {
2163                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2164                 }
2165         }
2166
2167         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2168         {
2169                 if(type == SAMPLER_PIXEL)
2170                 {
2171                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2172                 }
2173                 else
2174                 {
2175                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2176                 }
2177         }
2178
2179         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2180         {
2181                 if(type == SAMPLER_PIXEL)
2182                 {
2183                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2184                 }
2185                 else
2186                 {
2187                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2188                 }
2189         }
2190
2191         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2192         {
2193                 if(type == SAMPLER_PIXEL)
2194                 {
2195                         PixelProcessor::setReadSRGB(sampler, sRGB);
2196                 }
2197                 else
2198                 {
2199                         VertexProcessor::setReadSRGB(sampler, sRGB);
2200                 }
2201         }
2202
2203         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2204         {
2205                 if(type == SAMPLER_PIXEL)
2206                 {
2207                         PixelProcessor::setMipmapLOD(sampler, bias);
2208                 }
2209                 else
2210                 {
2211                         VertexProcessor::setMipmapLOD(sampler, bias);
2212                 }
2213         }
2214
2215         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2216         {
2217                 if(type == SAMPLER_PIXEL)
2218                 {
2219                         PixelProcessor::setBorderColor(sampler, borderColor);
2220                 }
2221                 else
2222                 {
2223                         VertexProcessor::setBorderColor(sampler, borderColor);
2224                 }
2225         }
2226
2227         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2228         {
2229                 if(type == SAMPLER_PIXEL)
2230                 {
2231                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2232                 }
2233                 else
2234                 {
2235                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2236                 }
2237         }
2238
2239         void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2240         {
2241                 if(type == SAMPLER_PIXEL)
2242                 {
2243                         PixelProcessor::setSwizzleR(sampler, swizzleR);
2244                 }
2245                 else
2246                 {
2247                         VertexProcessor::setSwizzleR(sampler, swizzleR);
2248                 }
2249         }
2250
2251         void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2252         {
2253                 if(type == SAMPLER_PIXEL)
2254                 {
2255                         PixelProcessor::setSwizzleG(sampler, swizzleG);
2256                 }
2257                 else
2258                 {
2259                         VertexProcessor::setSwizzleG(sampler, swizzleG);
2260                 }
2261         }
2262
2263         void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2264         {
2265                 if(type == SAMPLER_PIXEL)
2266                 {
2267                         PixelProcessor::setSwizzleB(sampler, swizzleB);
2268                 }
2269                 else
2270                 {
2271                         VertexProcessor::setSwizzleB(sampler, swizzleB);
2272                 }
2273         }
2274
2275         void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2276         {
2277                 if(type == SAMPLER_PIXEL)
2278                 {
2279                         PixelProcessor::setSwizzleA(sampler, swizzleA);
2280                 }
2281                 else
2282                 {
2283                         VertexProcessor::setSwizzleA(sampler, swizzleA);
2284                 }
2285         }
2286
2287         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2288         {
2289                 context->setPointSpriteEnable(pointSpriteEnable);
2290         }
2291
2292         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2293         {
2294                 context->setPointScaleEnable(pointScaleEnable);
2295         }
2296
2297         void Renderer::setLineWidth(float width)
2298         {
2299                 context->lineWidth = width;
2300         }
2301
2302         void Renderer::setDepthBias(float bias)
2303         {
2304                 depthBias = bias;
2305         }
2306
2307         void Renderer::setSlopeDepthBias(float slopeBias)
2308         {
2309                 slopeDepthBias = slopeBias;
2310         }
2311
2312         void Renderer::setPixelShader(const PixelShader *shader)
2313         {
2314                 context->pixelShader = shader;
2315
2316                 loadConstants(shader);
2317         }
2318
2319         void Renderer::setVertexShader(const VertexShader *shader)
2320         {
2321                 context->vertexShader = shader;
2322
2323                 loadConstants(shader);
2324         }
2325
2326         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2327         {
2328                 for(int i = 0; i < DRAW_COUNT; i++)
2329                 {
2330                         if(drawCall[i]->psDirtyConstF < index + count)
2331                         {
2332                                 drawCall[i]->psDirtyConstF = index + count;
2333                         }
2334                 }
2335
2336                 for(int i = 0; i < count; i++)
2337                 {
2338                         PixelProcessor::setFloatConstant(index + i, value);
2339                         value += 4;
2340                 }
2341         }
2342
2343         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2344         {
2345                 for(int i = 0; i < DRAW_COUNT; i++)
2346                 {
2347                         if(drawCall[i]->psDirtyConstI < index + count)
2348                         {
2349                                 drawCall[i]->psDirtyConstI = index + count;
2350                         }
2351                 }
2352
2353                 for(int i = 0; i < count; i++)
2354                 {
2355                         PixelProcessor::setIntegerConstant(index + i, value);
2356                         value += 4;
2357                 }
2358         }
2359
2360         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2361         {
2362                 for(int i = 0; i < DRAW_COUNT; i++)
2363                 {
2364                         if(drawCall[i]->psDirtyConstB < index + count)
2365                         {
2366                                 drawCall[i]->psDirtyConstB = index + count;
2367                         }
2368                 }
2369
2370                 for(int i = 0; i < count; i++)
2371                 {
2372                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2373                         boolean++;
2374                 }
2375         }
2376
2377         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2378         {
2379                 for(int i = 0; i < DRAW_COUNT; i++)
2380                 {
2381                         if(drawCall[i]->vsDirtyConstF < index + count)
2382                         {
2383                                 drawCall[i]->vsDirtyConstF = index + count;
2384                         }
2385                 }
2386
2387                 for(int i = 0; i < count; i++)
2388                 {
2389                         VertexProcessor::setFloatConstant(index + i, value);
2390                         value += 4;
2391                 }
2392         }
2393
2394         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2395         {
2396                 for(int i = 0; i < DRAW_COUNT; i++)
2397                 {
2398                         if(drawCall[i]->vsDirtyConstI < index + count)
2399                         {
2400                                 drawCall[i]->vsDirtyConstI = index + count;
2401                         }
2402                 }
2403
2404                 for(int i = 0; i < count; i++)
2405                 {
2406                         VertexProcessor::setIntegerConstant(index + i, value);
2407                         value += 4;
2408                 }
2409         }
2410
2411         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2412         {
2413                 for(int i = 0; i < DRAW_COUNT; i++)
2414                 {
2415                         if(drawCall[i]->vsDirtyConstB < index + count)
2416                         {
2417                                 drawCall[i]->vsDirtyConstB = index + count;
2418                         }
2419                 }
2420
2421                 for(int i = 0; i < count; i++)
2422                 {
2423                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2424                         boolean++;
2425                 }
2426         }
2427
2428         void Renderer::setModelMatrix(const Matrix &M, int i)
2429         {
2430                 VertexProcessor::setModelMatrix(M, i);
2431         }
2432
2433         void Renderer::setViewMatrix(const Matrix &V)
2434         {
2435                 VertexProcessor::setViewMatrix(V);
2436                 updateClipPlanes = true;
2437         }
2438
2439         void Renderer::setBaseMatrix(const Matrix &B)
2440         {
2441                 VertexProcessor::setBaseMatrix(B);
2442                 updateClipPlanes = true;
2443         }
2444
2445         void Renderer::setProjectionMatrix(const Matrix &P)
2446         {
2447                 VertexProcessor::setProjectionMatrix(P);
2448                 updateClipPlanes = true;
2449         }
2450
2451         void Renderer::addQuery(Query *query)
2452         {
2453                 queries.push_back(query);
2454         }
2455         
2456         void Renderer::removeQuery(Query *query)
2457         {
2458                 queries.remove(query);
2459         }
2460
2461         #if PERF_HUD
2462                 int Renderer::getThreadCount()
2463                 {
2464                         return threadCount;
2465                 }
2466                 
2467                 int64_t Renderer::getVertexTime(int thread)
2468                 {
2469                         return vertexTime[thread];
2470                 }
2471
2472                 int64_t Renderer::getSetupTime(int thread)
2473                 {
2474                         return setupTime[thread];
2475                 }
2476                         
2477                 int64_t Renderer::getPixelTime(int thread)
2478                 {
2479                         return pixelTime[thread];
2480                 }
2481
2482                 void Renderer::resetTimers()
2483                 {
2484                         for(int thread = 0; thread < threadCount; thread++)
2485                         {
2486                                 vertexTime[thread] = 0;
2487                                 setupTime[thread] = 0;
2488                                 pixelTime[thread] = 0;
2489                         }
2490                 }
2491         #endif
2492
2493         void Renderer::setViewport(const Viewport &viewport)
2494         {
2495                 this->viewport = viewport;
2496         }
2497
2498         void Renderer::setScissor(const Rect &scissor)
2499         {
2500                 this->scissor = scissor;
2501         }
2502
2503         void Renderer::setClipFlags(int flags)
2504         {
2505                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2506         }
2507
2508         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2509         {
2510                 if(index < MAX_CLIP_PLANES)
2511                 {
2512                         userPlane[index] = plane;
2513                 }
2514                 else ASSERT(false);
2515
2516                 updateClipPlanes = true;
2517         }
2518
2519         void Renderer::updateConfiguration(bool initialUpdate)
2520         {
2521                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2522
2523                 if(newConfiguration || initialUpdate)
2524                 {
2525                         terminateThreads();
2526
2527                         SwiftConfig::Configuration configuration = {0};
2528                         swiftConfig->getConfiguration(configuration);
2529
2530                         precacheVertex = !newConfiguration && configuration.precache;
2531                         precacheSetup = !newConfiguration && configuration.precache;
2532                         precachePixel = !newConfiguration && configuration.precache;
2533
2534                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2535                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2536                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2537
2538                         switch(configuration.textureSampleQuality)
2539                         {
2540                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2541                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2542                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2543                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2544                         }
2545
2546                         switch(configuration.mipmapQuality)
2547                         {
2548                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2549                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2550                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2551                         }
2552
2553                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2554
2555                         switch(configuration.transcendentalPrecision)
2556                         {
2557                         case 0:
2558                                 logPrecision = APPROXIMATE;
2559                                 expPrecision = APPROXIMATE;
2560                                 rcpPrecision = APPROXIMATE;
2561                                 rsqPrecision = APPROXIMATE;
2562                                 break;
2563                         case 1:
2564                                 logPrecision = PARTIAL;
2565                                 expPrecision = PARTIAL;
2566                                 rcpPrecision = PARTIAL;
2567                                 rsqPrecision = PARTIAL;
2568                                 break;
2569                         case 2:
2570                                 logPrecision = ACCURATE;
2571                                 expPrecision = ACCURATE;
2572                                 rcpPrecision = ACCURATE;
2573                                 rsqPrecision = ACCURATE;
2574                                 break;
2575                         case 3:
2576                                 logPrecision = WHQL;
2577                                 expPrecision = WHQL;
2578                                 rcpPrecision = WHQL;
2579                                 rsqPrecision = WHQL;
2580                                 break;
2581                         case 4:
2582                                 logPrecision = IEEE;
2583                                 expPrecision = IEEE;
2584                                 rcpPrecision = IEEE;
2585                                 rsqPrecision = IEEE;
2586                                 break;
2587                         default:
2588                                 logPrecision = ACCURATE;
2589                                 expPrecision = ACCURATE;
2590                                 rcpPrecision = ACCURATE;
2591                                 rsqPrecision = ACCURATE;
2592                                 break;
2593                         }
2594
2595                         switch(configuration.transparencyAntialiasing)
2596                         {
2597                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2598                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2599                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2600                         }
2601
2602                         switch(configuration.threadCount)
2603                         {
2604                         case -1: threadCount = CPUID::coreCount();        break;
2605                         case 0:  threadCount = CPUID::processAffinity();  break;
2606                         default: threadCount = configuration.threadCount; break;
2607                         }
2608
2609                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2610                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2611                         CPUID::setEnableSSE3(configuration.enableSSE3);
2612                         CPUID::setEnableSSE2(configuration.enableSSE2);
2613                         CPUID::setEnableSSE(configuration.enableSSE);
2614
2615                         for(int pass = 0; pass < 10; pass++)
2616                         {
2617                                 optimization[pass] = configuration.optimization[pass];
2618                         }
2619
2620                         forceWindowed = configuration.forceWindowed;
2621                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2622                         postBlendSRGB = configuration.postBlendSRGB;
2623                         exactColorRounding = configuration.exactColorRounding;
2624                         forceClearRegisters = configuration.forceClearRegisters;
2625
2626                 #ifndef NDEBUG
2627                         minPrimitives = configuration.minPrimitives;
2628                         maxPrimitives = configuration.maxPrimitives;
2629                 #endif
2630                 }
2631
2632                 if(!initialUpdate && !worker[0])
2633                 {
2634                         initializeThreads();
2635                 }
2636         }
2637 }