OSDN Git Service

Fix signed/unsigned comparison warnings.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2012 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Renderer.hpp"
13
14 #include "Clipper.hpp"
15 #include "Math.hpp"
16 #include "FrameBuffer.hpp"
17 #include "Timer.hpp"
18 #include "Surface.hpp"
19 #include "Half.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
24 #include "CPUID.hpp"
25 #include "Memory.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
28 #include "Debug.hpp"
29 #include "Reactor/Reactor.hpp"
30
31 #undef max
32
33 bool disableServer = true;
34
35 #ifndef NDEBUG
36 unsigned int minPrimitives = 1;
37 unsigned int maxPrimitives = 1 << 21;
38 #endif
39
40 namespace sw
41 {
42         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
43         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
44         extern bool booleanFaceRegister;
45         extern bool fullPixelPositionRegister;
46         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
47         extern bool secondaryColor;             // Specular lighting is applied after texturing
48
49         extern bool forceWindowed;
50         extern bool complementaryDepthBuffer;
51         extern bool postBlendSRGB;
52         extern bool exactColorRounding;
53         extern TransparencyAntialiasing transparencyAntialiasing;
54         extern bool forceClearRegisters;
55
56         extern bool precacheVertex;
57         extern bool precacheSetup;
58         extern bool precachePixel;
59
60         int batchSize = 128;
61         int threadCount = 1;
62         int unitCount = 1;
63         int clusterCount = 1;
64
65         TranscendentalPrecision logPrecision = ACCURATE;
66         TranscendentalPrecision expPrecision = ACCURATE;
67         TranscendentalPrecision rcpPrecision = ACCURATE;
68         TranscendentalPrecision rsqPrecision = ACCURATE;
69         bool perspectiveCorrection = true;
70
71         struct Parameters
72         {
73                 Renderer *renderer;
74                 int threadIndex;
75         };
76
77         DrawCall::DrawCall()
78         {
79                 queries = 0;
80
81                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
82                 vsDirtyConstI = 16;
83                 vsDirtyConstB = 16;
84
85                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
86                 psDirtyConstI = 16;
87                 psDirtyConstB = 16;
88
89                 references = -1;
90
91                 data = (DrawData*)allocate(sizeof(DrawData));
92                 data->constants = &constants;
93         }
94
95         DrawCall::~DrawCall()
96         {
97                 delete queries;
98
99                 deallocate(data);
100         }
101
102         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
103         {
104                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
105                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
106                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
107                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
108                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
109                 sw::secondaryColor = conventions.secondaryColor;
110                 sw::exactColorRounding = exactColorRounding;
111
112                 setRenderTarget(0, 0);
113                 clipper = new Clipper();
114
115                 updateViewMatrix = true;
116                 updateBaseMatrix = true;
117                 updateProjectionMatrix = true;
118                 updateClipPlanes = true;
119
120                 #if PERF_HUD
121                         resetTimers();
122                 #endif
123
124                 for(int i = 0; i < 16; i++)
125                 {
126                         vertexTask[i] = 0;
127
128                         worker[i] = 0;
129                         resume[i] = 0;
130                         suspend[i] = 0;
131                 }
132
133                 threadsAwake = 0;
134                 resumeApp = new Event();
135
136                 currentDraw = 0;
137                 nextDraw = 0;
138
139                 qHead = 0;
140                 qSize = 0;
141
142                 for(int i = 0; i < 16; i++)
143                 {
144                         triangleBatch[i] = 0;
145                         primitiveBatch[i] = 0;
146                 }
147
148                 for(int draw = 0; draw < DRAW_COUNT; draw++)
149                 {
150                         drawCall[draw] = new DrawCall();
151                         drawList[draw] = drawCall[draw];
152                 }
153
154                 for(int unit = 0; unit < 16; unit++)
155                 {
156                         primitiveProgress[unit].init();
157                 }
158
159                 for(int cluster = 0; cluster < 16; cluster++)
160                 {
161                         pixelProgress[cluster].init();
162                 }
163
164                 clipFlags = 0;
165
166                 swiftConfig = new SwiftConfig(disableServer);
167                 updateConfiguration(true);
168
169                 sync = new Resource(0);
170         }
171
172         Renderer::~Renderer()
173         {
174                 sync->destruct();
175
176                 delete clipper;
177                 clipper = 0;
178
179                 terminateThreads();
180                 delete resumeApp;
181
182                 for(int draw = 0; draw < DRAW_COUNT; draw++)
183                 {
184                         delete drawCall[draw];
185                 }
186
187                 delete swiftConfig;
188         }
189
190         void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
191         {
192                 blitter.clear(pixel, format, dest, dRect, rgbaMask);
193         }
194
195         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
196         {
197                 blitter.blit(source, sRect, dest, dRect, filter);
198         }
199
200         void Renderer::blit3D(Surface *source, Surface *dest)
201         {
202                 blitter.blit3D(source, dest);
203         }
204
205         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
206         {
207                 #ifndef NDEBUG
208                         if(count < minPrimitives || count > maxPrimitives)
209                         {
210                                 return;
211                         }
212                 #endif
213
214                 context->drawType = drawType;
215
216                 updateConfiguration();
217                 updateClipper();
218
219                 int ss = context->getSuperSampleCount();
220                 int ms = context->getMultiSampleCount();
221
222                 for(int q = 0; q < ss; q++)
223                 {
224                         unsigned int oldMultiSampleMask = context->multiSampleMask;
225                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
226
227                         if(!context->multiSampleMask)
228                         {
229                                 continue;
230                         }
231
232                         sync->lock(sw::PRIVATE);
233
234                         Routine *vertexRoutine;
235                         Routine *setupRoutine;
236                         Routine *pixelRoutine;
237
238                         if(update || oldMultiSampleMask != context->multiSampleMask)
239                         {
240                                 vertexState = VertexProcessor::update();
241                                 setupState = SetupProcessor::update();
242                                 pixelState = PixelProcessor::update();
243
244                                 vertexRoutine = VertexProcessor::routine(vertexState);
245                                 setupRoutine = SetupProcessor::routine(setupState);
246                                 pixelRoutine = PixelProcessor::routine(pixelState);
247                         }
248
249                         int batch = batchSize / ms;
250
251                         int (*setupPrimitives)(Renderer *renderer, int batch, int count);
252
253                         if(context->isDrawTriangle())
254                         {
255                                 switch(context->fillMode)
256                                 {
257                                 case FILL_SOLID:
258                                         setupPrimitives = setupSolidTriangles;
259                                         break;
260                                 case FILL_WIREFRAME:
261                                         setupPrimitives = setupWireframeTriangle;
262                                         batch = 1;
263                                         break;
264                                 case FILL_VERTEX:
265                                         setupPrimitives = setupVertexTriangle;
266                                         batch = 1;
267                                         break;
268                                 default: ASSERT(false);
269                                 }
270                         }
271                         else if(context->isDrawLine())
272                         {
273                                 setupPrimitives = setupLines;
274                         }
275                         else   // Point draw
276                         {
277                                 setupPrimitives = setupPoints;
278                         }
279
280                         DrawCall *draw = 0;
281
282                         do
283                         {
284                                 for(int i = 0; i < DRAW_COUNT; i++)
285                                 {
286                                         if(drawCall[i]->references == -1)
287                                         {
288                                                 draw = drawCall[i];
289                                                 drawList[nextDraw % DRAW_COUNT] = draw;
290
291                                                 break;
292                                         }
293                                 }
294
295                                 if(!draw)
296                                 {
297                                         resumeApp->wait();
298                                 }
299                         }
300                         while(!draw);
301
302                         DrawData *data = draw->data;
303
304                         if(queries.size() != 0)
305                         {
306                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
307                                 {
308                                         atomicIncrement(&(*query)->reference);
309                                 }
310
311                                 draw->queries = new std::list<Query*>(queries);
312                         }
313
314                         draw->drawType = drawType;
315                         draw->batchSize = batch;
316
317                         vertexRoutine->bind();
318                         setupRoutine->bind();
319                         pixelRoutine->bind();
320
321                         draw->vertexRoutine = vertexRoutine;
322                         draw->setupRoutine = setupRoutine;
323                         draw->pixelRoutine = pixelRoutine;
324                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
325                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
326                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
327                         draw->setupPrimitives = setupPrimitives;
328                         draw->setupState = setupState;
329
330                         for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
331                         {
332                                 draw->vertexStream[i] = context->input[i].resource;
333                                 data->input[i] = context->input[i].buffer;
334                                 data->stride[i] = context->input[i].stride;
335
336                                 if(draw->vertexStream[i])
337                                 {
338                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
339                                 }
340                         }
341
342                         if(context->indexBuffer)
343                         {
344                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
345                         }
346
347                         draw->indexBuffer = context->indexBuffer;
348
349                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
350                         {
351                                 draw->texture[sampler] = 0;
352                         }
353
354                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
355                         {
356                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
357                                 {
358                                         draw->texture[sampler] = context->texture[sampler];
359                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
360
361                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
362                                 }
363                         }
364
365                         if(context->pixelShader)
366                         {
367                                 if(draw->psDirtyConstF)
368                                 {
369                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
370                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
371                                         draw->psDirtyConstF = 0;
372                                 }
373
374                                 if(draw->psDirtyConstI)
375                                 {
376                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
377                                         draw->psDirtyConstI = 0;
378                                 }
379
380                                 if(draw->psDirtyConstB)
381                                 {
382                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
383                                         draw->psDirtyConstB = 0;
384                                 }
385                         }
386                         
387                         if(context->pixelShaderVersion() <= 0x0104)
388                         {
389                                 for(int stage = 0; stage < 8; stage++)
390                                 {
391                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
392                                         {
393                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
394                                         }
395                                         else break;
396                                 }
397                         }
398
399                         if(context->vertexShader)
400                         {
401                                 if(context->vertexShader->getVersion() >= 0x0300)
402                                 {
403                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
404                                         {
405                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
406                                                 {
407                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
408                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
409
410                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
411                                                 }
412                                         }
413                                 }
414
415                                 if(draw->vsDirtyConstF)
416                                 {
417                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
418                                         draw->vsDirtyConstF = 0;
419                                 }
420
421                                 if(draw->vsDirtyConstI)
422                                 {
423                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
424                                         draw->vsDirtyConstI = 0;
425                                 }
426
427                                 if(draw->vsDirtyConstB)
428                                 {
429                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
430                                         draw->vsDirtyConstB = 0;
431                                 }
432
433                                 if(context->vertexShader->instanceIdDeclared)
434                                 {
435                                         data->instanceID = context->instanceID;
436                                 }
437                         }
438                         else
439                         {
440                                 data->ff = ff;
441
442                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
443                                 draw->vsDirtyConstI = 16;
444                                 draw->vsDirtyConstB = 16;
445                         }
446
447                         if(pixelState.stencilActive)
448                         {
449                                 data->stencil[0] = stencil;
450                                 data->stencil[1] = stencilCCW;
451                         }
452
453                         if(pixelState.fogActive)
454                         {
455                                 data->fog = fog;
456                         }
457
458                         if(setupState.isDrawPoint)
459                         {
460                                 data->point = point;
461                         }
462
463                         data->lineWidth = context->lineWidth;
464
465                         data->factor = factor;
466
467                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
468                         {
469                                 float ref = context->alphaReference * (1.0f / 255.0f);
470                                 float margin = sw::min(ref, 1.0f - ref);
471
472                                 if(ms == 4)
473                                 {
474                                         data->a2c0 = replicate(ref - margin * 0.6f);
475                                         data->a2c1 = replicate(ref - margin * 0.2f);
476                                         data->a2c2 = replicate(ref + margin * 0.2f);
477                                         data->a2c3 = replicate(ref + margin * 0.6f);
478                                 }
479                                 else if(ms == 2)
480                                 {
481                                         data->a2c0 = replicate(ref - margin * 0.3f);
482                                         data->a2c1 = replicate(ref + margin * 0.3f);
483                                 }
484                                 else ASSERT(false);
485                         }
486
487                         if(pixelState.occlusionEnabled)
488                         {
489                                 for(int cluster = 0; cluster < clusterCount; cluster++)
490                                 {
491                                         data->occlusion[cluster] = 0;
492                                 }
493                         }
494
495                         #if PERF_PROFILE
496                                 for(int cluster = 0; cluster < clusterCount; cluster++)
497                                 {
498                                         for(int i = 0; i < PERF_TIMERS; i++)
499                                         {
500                                                 data->cycles[i][cluster] = 0;
501                                         }
502                                 }
503                         #endif
504
505                         // Viewport
506                         {
507                                 float W = 0.5f * viewport.width;
508                                 float H = 0.5f * viewport.height;
509                                 float X0 = viewport.x0 + W;
510                                 float Y0 = viewport.y0 + H;
511                                 float N = viewport.minZ;
512                                 float F = viewport.maxZ;
513                                 float Z = F - N;
514
515                                 if(context->isDrawTriangle(false))
516                                 {
517                                         N += depthBias;
518                                 }
519
520                                 if(complementaryDepthBuffer)
521                                 {
522                                         Z = -Z;
523                                         N = 1 - N;
524                                 }
525
526                                 static const float X[5][16] =   // Fragment offsets
527                                 {
528                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
529                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
530                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
531                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
532                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
533                                 };
534
535                                 static const float Y[5][16] =   // Fragment offsets
536                                 {
537                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
538                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
539                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
540                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
541                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
542                                 };
543
544                                 int s = sw::log2(ss);
545
546                                 data->Wx16 = replicate(W * 16);
547                                 data->Hx16 = replicate(H * 16);
548                                 data->X0x16 = replicate(X0 * 16 - 8);
549                                 data->Y0x16 = replicate(Y0 * 16 - 8);
550                                 data->XXXX = replicate(X[s][q] / W);
551                                 data->YYYY = replicate(Y[s][q] / H);
552                                 data->halfPixelX = replicate(0.5f / W);
553                                 data->halfPixelY = replicate(0.5f / H);
554                                 data->viewportHeight = abs(viewport.height);
555                                 data->slopeDepthBias = slopeDepthBias;
556                                 data->depthRange = Z;
557                                 data->depthNear = N;
558                                 draw->clipFlags = clipFlags;
559
560                                 if(clipFlags)
561                                 {
562                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
563                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
564                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
565                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
566                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
567                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
568                                 }
569                         }
570
571                         // Target
572                         {
573                                 for(int index = 0; index < RENDERTARGETS; index++)
574                                 {
575                                         draw->renderTarget[index] = context->renderTarget[index];
576
577                                         if(draw->renderTarget[index])
578                                         {
579                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
580                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
581                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
582                                         }
583                                 }
584
585                                 draw->depthStencil = context->depthStencil;
586
587                                 if(draw->depthStencil)
588                                 {
589                                         data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
590                                         data->depthPitchB = context->depthStencil->getInternalPitchB();
591                                         data->depthSliceB = context->depthStencil->getInternalSliceB();
592
593                                         data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
594                                         data->stencilPitchB = context->depthStencil->getStencilPitchB();
595                                         data->stencilSliceB = context->depthStencil->getStencilSliceB();
596                                 }
597                         }
598
599                         // Scissor
600                         {
601                                 data->scissorX0 = scissor.x0;
602                                 data->scissorX1 = scissor.x1;
603                                 data->scissorY0 = scissor.y0;
604                                 data->scissorY1 = scissor.y1;
605                         }
606
607                         draw->primitive = 0;
608                         draw->count = count;
609
610                         draw->references = (count + batch - 1) / batch;
611
612                         schedulerMutex.lock();
613                         nextDraw++;
614                         schedulerMutex.unlock();
615
616                         if(threadCount > 1)
617                         {
618                                 if(!threadsAwake)
619                                 {
620                                         suspend[0]->wait();
621
622                                         threadsAwake = 1;
623                                         task[0].type = Task::RESUME;
624
625                                         resume[0]->signal();
626                                 }
627                         }
628                         else   // Use main thread for draw execution
629                         {
630                                 threadsAwake = 1;
631                                 task[0].type = Task::RESUME;
632
633                                 taskLoop(0);
634                         }
635                 }
636         }
637
638         void Renderer::threadFunction(void *parameters)
639         {
640                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
641                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
642
643                 if(logPrecision < IEEE)
644                 {
645                         CPUID::setFlushToZero(true);
646                         CPUID::setDenormalsAreZero(true);
647                 }
648
649                 renderer->threadLoop(threadIndex);
650         }
651
652         void Renderer::threadLoop(int threadIndex)
653         {
654                 while(!exitThreads)
655                 {
656                         taskLoop(threadIndex);
657
658                         suspend[threadIndex]->signal();
659                         resume[threadIndex]->wait();
660                 }
661         }
662
663         void Renderer::taskLoop(int threadIndex)
664         {
665                 while(task[threadIndex].type != Task::SUSPEND)
666                 {
667                         scheduleTask(threadIndex);
668                         executeTask(threadIndex);
669                 }
670         }
671
672         void Renderer::findAvailableTasks()
673         {
674                 // Find pixel tasks
675                 for(int cluster = 0; cluster < clusterCount; cluster++)
676                 {
677                         if(!pixelProgress[cluster].executing)
678                         {
679                                 for(int unit = 0; unit < unitCount; unit++)
680                                 {
681                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
682                                         {
683                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
684                                                 {
685                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
686                                                         {
687                                                                 Task &task = taskQueue[qHead];
688                                                                 task.type = Task::PIXELS;
689                                                                 task.primitiveUnit = unit;
690                                                                 task.pixelCluster = cluster;
691
692                                                                 pixelProgress[cluster].executing = true;
693
694                                                                 // Commit to the task queue
695                                                                 qHead = (qHead + 1) % 32;
696                                                                 qSize++;
697
698                                                                 break;
699                                                         }
700                                                 }
701                                         }
702                                 }
703                         }
704                 }
705         
706                 // Find primitive tasks
707                 if(currentDraw == nextDraw)
708                 {
709                         return;   // No more primitives to process
710                 }
711
712                 for(int unit = 0; unit < unitCount; unit++)
713                 {
714                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
715
716                         if(draw->primitive >= draw->count)
717                         {
718                                 currentDraw++;
719
720                                 if(currentDraw == nextDraw)
721                                 {
722                                         return;   // No more primitives to process
723                                 }
724
725                                 draw = drawList[currentDraw % DRAW_COUNT];
726                         }
727
728                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
729                         {
730                                 int primitive = draw->primitive;
731                                 int count = draw->count;
732                                 int batch = draw->batchSize;
733
734                                 primitiveProgress[unit].drawCall = currentDraw;
735                                 primitiveProgress[unit].firstPrimitive = primitive;
736                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
737
738                                 draw->primitive += batch;
739
740                                 Task &task = taskQueue[qHead];
741                                 task.type = Task::PRIMITIVES;
742                                 task.primitiveUnit = unit;
743
744                                 primitiveProgress[unit].references = -1;
745
746                                 // Commit to the task queue
747                                 qHead = (qHead + 1) % 32;
748                                 qSize++;
749                         }
750                 }
751         }
752
753         void Renderer::scheduleTask(int threadIndex)
754         {
755                 schedulerMutex.lock();
756
757                 if((int)qSize < threadCount - threadsAwake + 1)
758                 {
759                         findAvailableTasks();
760                 }
761
762                 if(qSize != 0)
763                 {
764                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
765                         qSize--;
766
767                         if(threadsAwake != threadCount)
768                         {
769                                 int wakeup = qSize - threadsAwake + 1;
770
771                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
772                                 {
773                                         if(task[i].type == Task::SUSPEND)
774                                         {
775                                                 suspend[i]->wait();
776                                                 task[i].type = Task::RESUME;
777                                                 resume[i]->signal();
778
779                                                 threadsAwake++;
780                                                 wakeup--;
781                                         }
782                                 }
783                         }
784                 }
785                 else
786                 {
787                         task[threadIndex].type = Task::SUSPEND;
788
789                         threadsAwake--;
790                 }
791
792                 schedulerMutex.unlock();
793         }
794
795         void Renderer::executeTask(int threadIndex)
796         {
797                 #if PERF_HUD
798                         int64_t startTick = Timer::ticks();
799                 #endif
800
801                 switch(task[threadIndex].type)
802                 {
803                 case Task::PRIMITIVES:
804                         {
805                                 int unit = task[threadIndex].primitiveUnit;
806                                 
807                                 int input = primitiveProgress[unit].firstPrimitive;
808                                 int count = primitiveProgress[unit].primitiveCount;
809                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
810                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
811
812                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
813
814                                 #if PERF_HUD
815                                         int64_t time = Timer::ticks();
816                                         vertexTime[threadIndex] += time - startTick;
817                                         startTick = time;
818                                 #endif
819
820                                 int visible = setupPrimitives(this, unit, count);
821
822                                 primitiveProgress[unit].visible = visible;
823                                 primitiveProgress[unit].references = clusterCount;
824
825                                 #if PERF_HUD
826                                         setupTime[threadIndex] += Timer::ticks() - startTick;
827                                 #endif
828                         }
829                         break;
830                 case Task::PIXELS:
831                         {
832                                 int unit = task[threadIndex].primitiveUnit;
833                                 int visible = primitiveProgress[unit].visible;
834
835                                 if(visible > 0)
836                                 {
837                                         int cluster = task[threadIndex].pixelCluster;
838                                         Primitive *primitive = primitiveBatch[unit];
839                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
840                                         DrawData *data = draw->data;
841                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
842
843                                         pixelRoutine(primitive, visible, cluster, data);
844                                 }
845
846                                 finishRendering(task[threadIndex]);
847
848                                 #if PERF_HUD
849                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
850                                 #endif
851                         }
852                         break;
853                 case Task::RESUME:
854                         break;
855                 case Task::SUSPEND:
856                         break;
857                 default:
858                         ASSERT(false);
859                 }
860         }
861
862         void Renderer::synchronize()
863         {
864                 sync->lock(sw::PUBLIC);
865                 sync->unlock();
866         }
867
868         void Renderer::finishRendering(Task &pixelTask)
869         {
870                 int unit = pixelTask.primitiveUnit;
871                 int cluster = pixelTask.pixelCluster;
872
873                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
874                 DrawData &data = *draw.data;
875                 int primitive = primitiveProgress[unit].firstPrimitive;
876                 int count = primitiveProgress[unit].primitiveCount;
877
878                 pixelProgress[cluster].processedPrimitives = primitive + count;
879
880                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
881                 {
882                         pixelProgress[cluster].drawCall++;
883                         pixelProgress[cluster].processedPrimitives = 0;
884                 }
885
886                 int ref = atomicDecrement(&primitiveProgress[unit].references);
887
888                 if(ref == 0)
889                 {
890                         ref = atomicDecrement(&draw.references);
891
892                         if(ref == 0)
893                         {
894                                 #if PERF_PROFILE
895                                         for(int cluster = 0; cluster < clusterCount; cluster++)
896                                         {
897                                                 for(int i = 0; i < PERF_TIMERS; i++)
898                                                 {
899                                                         profiler.cycles[i] += data.cycles[i][cluster];
900                                                 }
901                                         }
902                                 #endif
903
904                                 if(draw.queries)
905                                 {
906                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
907                                         {
908                                                 Query *query = *q;
909
910                                                 for(int cluster = 0; cluster < clusterCount; cluster++)
911                                                 {
912                                                         atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
913                                                 }
914
915                                                 atomicDecrement(&query->reference);
916                                         }
917
918                                         delete draw.queries;
919                                         draw.queries = 0;
920                                 }
921
922                                 for(int i = 0; i < RENDERTARGETS; i++)
923                                 {
924                                         if(draw.renderTarget[i])
925                                         {
926                                                 draw.renderTarget[i]->unlockInternal();
927                                         }
928                                 }
929
930                                 if(draw.depthStencil)
931                                 {
932                                         draw.depthStencil->unlockInternal();
933                                         draw.depthStencil->unlockStencil();
934                                 }
935
936                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
937                                 {
938                                         if(draw.texture[i])
939                                         {
940                                                 draw.texture[i]->unlock();
941                                         }
942                                 }
943
944                                 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
945                                 {
946                                         if(draw.vertexStream[i])
947                                         {
948                                                 draw.vertexStream[i]->unlock();
949                                         }
950                                 }
951
952                                 if(draw.indexBuffer)
953                                 {
954                                         draw.indexBuffer->unlock();
955                                 }
956
957                                 draw.vertexRoutine->unbind();
958                                 draw.setupRoutine->unbind();
959                                 draw.pixelRoutine->unbind();
960
961                                 sync->unlock();
962
963                                 draw.references = -1;
964                                 resumeApp->signal();
965                         }
966                 }
967
968                 pixelProgress[cluster].executing = false;
969         }
970
971         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
972         {
973                 Triangle *triangle = triangleBatch[unit];
974                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
975                 DrawData *data = draw->data;
976                 VertexTask *task = vertexTask[thread];
977
978                 const void *indices = data->indices;
979                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
980
981                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
982                 {
983                         task->vertexCache.clear();
984                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
985                 }
986
987                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
988
989                 switch(draw->drawType)
990                 {
991                 case DRAW_POINTLIST:
992                         {
993                                 unsigned int index = start;
994
995                                 for(unsigned int i = 0; i < triangleCount; i++)
996                                 {
997                                         batch[i][0] = index;
998                                         batch[i][1] = index;
999                                         batch[i][2] = index;
1000
1001                                         index += 1;
1002                                 }
1003                         }
1004                         break;
1005                 case DRAW_LINELIST:
1006                         {
1007                                 unsigned int index = 2 * start;
1008
1009                                 for(unsigned int i = 0; i < triangleCount; i++)
1010                                 {
1011                                         batch[i][0] = index + 0;
1012                                         batch[i][1] = index + 1;
1013                                         batch[i][2] = index + 1;
1014
1015                                         index += 2;
1016                                 }
1017                         }
1018                         break;
1019                 case DRAW_LINESTRIP:
1020                         {
1021                                 unsigned int index = start;
1022
1023                                 for(unsigned int i = 0; i < triangleCount; i++)
1024                                 {
1025                                         batch[i][0] = index + 0;
1026                                         batch[i][1] = index + 1;
1027                                         batch[i][2] = index + 1;
1028
1029                                         index += 1;
1030                                 }
1031                         }
1032                         break;
1033                 case DRAW_LINELOOP:
1034                         {
1035                                 unsigned int index = start;
1036
1037                                 for(unsigned int i = 0; i < triangleCount; i++)
1038                                 {
1039                                         batch[i][0] = (index + 0) % loop;
1040                                         batch[i][1] = (index + 1) % loop;
1041                                         batch[i][2] = (index + 1) % loop;
1042
1043                                         index += 1;
1044                                 }
1045                         }
1046                         break;
1047                 case DRAW_TRIANGLELIST:
1048                         {
1049                                 unsigned int index = 3 * start;
1050
1051                                 for(unsigned int i = 0; i < triangleCount; i++)
1052                                 {
1053                                         batch[i][0] = index + 0;
1054                                         batch[i][1] = index + 1;
1055                                         batch[i][2] = index + 2;
1056
1057                                         index += 3;
1058                                 }
1059                         }
1060                         break;
1061                 case DRAW_TRIANGLESTRIP:
1062                         {
1063                                 unsigned int index = start;
1064
1065                                 for(unsigned int i = 0; i < triangleCount; i++)
1066                                 {
1067                                         batch[i][0] = index + 0;
1068                                         batch[i][1] = index + (index & 1) + 1;
1069                                         batch[i][2] = index + (~index & 1) + 1;
1070
1071                                         index += 1;
1072                                 }
1073                         }
1074                         break;
1075                 case DRAW_TRIANGLEFAN:
1076                         {
1077                                 unsigned int index = start;
1078
1079                                 for(unsigned int i = 0; i < triangleCount; i++)
1080                                 {
1081                                         batch[i][0] = index + 1;
1082                                         batch[i][1] = index + 2;
1083                                         batch[i][2] = 0;
1084
1085                                         index += 1;
1086                                 }
1087                         }
1088                         break;
1089                 case DRAW_INDEXEDPOINTLIST8:
1090                         {
1091                                 const unsigned char *index = (const unsigned char*)indices + start;
1092
1093                                 for(unsigned int i = 0; i < triangleCount; i++)
1094                                 {
1095                                         batch[i][0] = *index;
1096                                         batch[i][1] = *index;
1097                                         batch[i][2] = *index;
1098
1099                                         index += 1;
1100                                 }
1101                         }
1102                         break;
1103                 case DRAW_INDEXEDPOINTLIST16:
1104                         {
1105                                 const unsigned short *index = (const unsigned short*)indices + start;
1106
1107                                 for(unsigned int i = 0; i < triangleCount; i++)
1108                                 {
1109                                         batch[i][0] = *index;
1110                                         batch[i][1] = *index;
1111                                         batch[i][2] = *index;
1112
1113                                         index += 1;
1114                                 }
1115                         }
1116                         break;
1117                 case DRAW_INDEXEDPOINTLIST32:
1118                         {
1119                                 const unsigned int *index = (const unsigned int*)indices + start;
1120
1121                                 for(unsigned int i = 0; i < triangleCount; i++)
1122                                 {
1123                                         batch[i][0] = *index;
1124                                         batch[i][1] = *index;
1125                                         batch[i][2] = *index;
1126
1127                                         index += 1;
1128                                 }
1129                         }
1130                         break;
1131                 case DRAW_INDEXEDLINELIST8:
1132                         {
1133                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1134
1135                                 for(unsigned int i = 0; i < triangleCount; i++)
1136                                 {
1137                                         batch[i][0] = index[0];
1138                                         batch[i][1] = index[1];
1139                                         batch[i][2] = index[1];
1140
1141                                         index += 2;
1142                                 }
1143                         }
1144                         break;
1145                 case DRAW_INDEXEDLINELIST16:
1146                         {
1147                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1148
1149                                 for(unsigned int i = 0; i < triangleCount; i++)
1150                                 {
1151                                         batch[i][0] = index[0];
1152                                         batch[i][1] = index[1];
1153                                         batch[i][2] = index[1];
1154
1155                                         index += 2;
1156                                 }
1157                         }
1158                         break;
1159                 case DRAW_INDEXEDLINELIST32:
1160                         {
1161                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1162
1163                                 for(unsigned int i = 0; i < triangleCount; i++)
1164                                 {
1165                                         batch[i][0] = index[0];
1166                                         batch[i][1] = index[1];
1167                                         batch[i][2] = index[1];
1168
1169                                         index += 2;
1170                                 }
1171                         }
1172                         break;
1173                 case DRAW_INDEXEDLINESTRIP8:
1174                         {
1175                                 const unsigned char *index = (const unsigned char*)indices + start;
1176
1177                                 for(unsigned int i = 0; i < triangleCount; i++)
1178                                 {
1179                                         batch[i][0] = index[0];
1180                                         batch[i][1] = index[1];
1181                                         batch[i][2] = index[1];
1182
1183                                         index += 1;
1184                                 }
1185                         }
1186                         break;
1187                 case DRAW_INDEXEDLINESTRIP16:
1188                         {
1189                                 const unsigned short *index = (const unsigned short*)indices + start;
1190
1191                                 for(unsigned int i = 0; i < triangleCount; i++)
1192                                 {
1193                                         batch[i][0] = index[0];
1194                                         batch[i][1] = index[1];
1195                                         batch[i][2] = index[1];
1196
1197                                         index += 1;
1198                                 }
1199                         }
1200                         break;
1201                 case DRAW_INDEXEDLINESTRIP32:
1202                         {
1203                                 const unsigned int *index = (const unsigned int*)indices + start;
1204
1205                                 for(unsigned int i = 0; i < triangleCount; i++)
1206                                 {
1207                                         batch[i][0] = index[0];
1208                                         batch[i][1] = index[1];
1209                                         batch[i][2] = index[1];
1210
1211                                         index += 1;
1212                                 }
1213                         }
1214                         break;
1215                 case DRAW_INDEXEDLINELOOP8:
1216                         {
1217                                 const unsigned char *index = (const unsigned char*)indices;
1218
1219                                 for(unsigned int i = 0; i < triangleCount; i++)
1220                                 {
1221                                         batch[i][0] = index[(start + i + 0) % loop];
1222                                         batch[i][1] = index[(start + i + 1) % loop];
1223                                         batch[i][2] = index[(start + i + 1) % loop];
1224                                 }
1225                         }
1226                         break;
1227                 case DRAW_INDEXEDLINELOOP16:
1228                         {
1229                                 const unsigned short *index = (const unsigned short*)indices;
1230
1231                                 for(unsigned int i = 0; i < triangleCount; i++)
1232                                 {
1233                                         batch[i][0] = index[(start + i + 0) % loop];
1234                                         batch[i][1] = index[(start + i + 1) % loop];
1235                                         batch[i][2] = index[(start + i + 1) % loop];
1236                                 }
1237                         }
1238                         break;
1239                 case DRAW_INDEXEDLINELOOP32:
1240                         {
1241                                 const unsigned int *index = (const unsigned int*)indices;
1242
1243                                 for(unsigned int i = 0; i < triangleCount; i++)
1244                                 {
1245                                         batch[i][0] = index[(start + i + 0) % loop];
1246                                         batch[i][1] = index[(start + i + 1) % loop];
1247                                         batch[i][2] = index[(start + i + 1) % loop];
1248                                 }
1249                         }
1250                         break;
1251                 case DRAW_INDEXEDTRIANGLELIST8:
1252                         {
1253                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1254
1255                                 for(unsigned int i = 0; i < triangleCount; i++)
1256                                 {
1257                                         batch[i][0] = index[0];
1258                                         batch[i][1] = index[1];
1259                                         batch[i][2] = index[2];
1260
1261                                         index += 3;
1262                                 }
1263                         }
1264                         break;
1265                 case DRAW_INDEXEDTRIANGLELIST16:
1266                         {
1267                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1268
1269                                 for(unsigned int i = 0; i < triangleCount; i++)
1270                                 {
1271                                         batch[i][0] = index[0];
1272                                         batch[i][1] = index[1];
1273                                         batch[i][2] = index[2];
1274
1275                                         index += 3;
1276                                 }
1277                         }
1278                         break;
1279                 case DRAW_INDEXEDTRIANGLELIST32:
1280                         {
1281                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1282
1283                                 for(unsigned int i = 0; i < triangleCount; i++)
1284                                 {
1285                                         batch[i][0] = index[0];
1286                                         batch[i][1] = index[1];
1287                                         batch[i][2] = index[2];
1288
1289                                         index += 3;
1290                                 }
1291                         }
1292                         break;
1293                 case DRAW_INDEXEDTRIANGLESTRIP8:
1294                         {
1295                                 const unsigned char *index = (const unsigned char*)indices + start;
1296
1297                                 for(unsigned int i = 0; i < triangleCount; i++)
1298                                 {
1299                                         batch[i][0] = index[0];
1300                                         batch[i][1] = index[((start + i) & 1) + 1];
1301                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1302
1303                                         index += 1;
1304                                 }
1305                         }
1306                         break;
1307                 case DRAW_INDEXEDTRIANGLESTRIP16:
1308                         {
1309                                 const unsigned short *index = (const unsigned short*)indices + start;
1310
1311                                 for(unsigned int i = 0; i < triangleCount; i++)
1312                                 {
1313                                         batch[i][0] = index[0];
1314                                         batch[i][1] = index[((start + i) & 1) + 1];
1315                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1316
1317                                         index += 1;
1318                                 }
1319                         }
1320                         break;
1321                 case DRAW_INDEXEDTRIANGLESTRIP32:
1322                         {
1323                                 const unsigned int *index = (const unsigned int*)indices + start;
1324
1325                                 for(unsigned int i = 0; i < triangleCount; i++)
1326                                 {
1327                                         batch[i][0] = index[0];
1328                                         batch[i][1] = index[((start + i) & 1) + 1];
1329                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1330
1331                                         index += 1;
1332                                 }
1333                         }
1334                         break;
1335                 case DRAW_INDEXEDTRIANGLEFAN8:
1336                         {
1337                                 const unsigned char *index = (const unsigned char*)indices;
1338
1339                                 for(unsigned int i = 0; i < triangleCount; i++)
1340                                 {
1341                                         batch[i][0] = index[start + i + 1];
1342                                         batch[i][1] = index[start + i + 2];
1343                                         batch[i][2] = index[0];
1344                                 }
1345                         }
1346                         break;
1347                 case DRAW_INDEXEDTRIANGLEFAN16:
1348                         {
1349                                 const unsigned short *index = (const unsigned short*)indices;
1350
1351                                 for(unsigned int i = 0; i < triangleCount; i++)
1352                                 {
1353                                         batch[i][0] = index[start + i + 1];
1354                                         batch[i][1] = index[start + i + 2];
1355                                         batch[i][2] = index[0];
1356                                 }
1357                         }
1358                         break;
1359                 case DRAW_INDEXEDTRIANGLEFAN32:
1360                         {
1361                                 const unsigned int *index = (const unsigned int*)indices;
1362
1363                                 for(unsigned int i = 0; i < triangleCount; i++)
1364                                 {
1365                                         batch[i][0] = index[start + i + 1];
1366                                         batch[i][1] = index[start + i + 2];
1367                                         batch[i][2] = index[0];
1368                                 }
1369                         }
1370                         break;
1371         case DRAW_QUADLIST:
1372                         {
1373                                 unsigned int index = 4 * start / 2;
1374
1375                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1376                                 {
1377                                         batch[i+0][0] = index + 0;
1378                                         batch[i+0][1] = index + 1;
1379                                         batch[i+0][2] = index + 2;
1380
1381                     batch[i+1][0] = index + 0;
1382                                         batch[i+1][1] = index + 2;
1383                                         batch[i+1][2] = index + 3;
1384
1385                                         index += 4;
1386                                 }
1387                         }
1388                         break;
1389                 default:
1390                         ASSERT(false);
1391                         return;
1392                 }
1393
1394                 task->vertexCount = triangleCount * 3;
1395                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1396         }
1397
1398         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1399         {
1400                 Triangle *triangle = renderer->triangleBatch[unit];
1401                 Primitive *primitive = renderer->primitiveBatch[unit];
1402
1403                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1404                 SetupProcessor::State &state = draw.setupState;
1405                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1406
1407                 int ms = state.multiSample;
1408                 int pos = state.positionRegister;
1409                 const DrawData *data = draw.data;
1410                 int visible = 0;
1411
1412                 for(int i = 0; i < count; i++, triangle++)
1413                 {
1414                         Vertex &v0 = triangle->v0;
1415                         Vertex &v1 = triangle->v1;
1416                         Vertex &v2 = triangle->v2;
1417
1418                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1419                         {
1420                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1421
1422                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1423
1424                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1425                                 {
1426                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1427                                         {
1428                                                 continue;
1429                                         }
1430                                 }
1431
1432                                 if(setupRoutine(primitive, triangle, &polygon, data))
1433                                 {
1434                                         primitive += ms;
1435                                         visible++;
1436                                 }
1437                         }
1438                 }
1439
1440                 return visible;
1441         }
1442
1443         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1444         {
1445                 Triangle *triangle = renderer->triangleBatch[unit];
1446                 Primitive *primitive = renderer->primitiveBatch[unit];
1447                 int visible = 0;
1448
1449                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1450                 SetupProcessor::State &state = draw.setupState;
1451                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1452
1453                 const Vertex &v0 = triangle[0].v0;
1454                 const Vertex &v1 = triangle[0].v1;
1455                 const Vertex &v2 = triangle[0].v2;
1456
1457                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1458
1459                 if(state.cullMode == CULL_CLOCKWISE)
1460                 {
1461                         if(d >= 0) return 0;
1462                 }
1463                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1464                 {
1465                         if(d <= 0) return 0;
1466                 }
1467
1468                 // Copy attributes
1469                 triangle[1].v0 = v1;
1470                 triangle[1].v1 = v2;
1471                 triangle[2].v0 = v2;
1472                 triangle[2].v1 = v0;
1473
1474                 if(state.color[0][0].flat)   // FIXME
1475                 {
1476                         for(int i = 0; i < 2; i++)
1477                         {
1478                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1479                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1480                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1481                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1482                         }
1483                 }
1484
1485                 for(int i = 0; i < 3; i++)
1486                 {
1487                         if(setupLine(renderer, *primitive, *triangle, draw))
1488                         {
1489                                 primitive->area = 0.5f * d;
1490
1491                                 primitive++;
1492                                 visible++;
1493                         }
1494
1495                         triangle++;
1496                 }
1497
1498                 return visible;
1499         }
1500         
1501         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1502         {
1503                 Triangle *triangle = renderer->triangleBatch[unit];
1504                 Primitive *primitive = renderer->primitiveBatch[unit];
1505                 int visible = 0;
1506
1507                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1508                 SetupProcessor::State &state = draw.setupState;
1509
1510                 const Vertex &v0 = triangle[0].v0;
1511                 const Vertex &v1 = triangle[0].v1;
1512                 const Vertex &v2 = triangle[0].v2;
1513
1514                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1515
1516                 if(state.cullMode == CULL_CLOCKWISE)
1517                 {
1518                         if(d >= 0) return 0;
1519                 }
1520                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1521                 {
1522                         if(d <= 0) return 0;
1523                 }
1524
1525                 // Copy attributes
1526                 triangle[1].v0 = v1;
1527                 triangle[2].v0 = v2;
1528
1529                 for(int i = 0; i < 3; i++)
1530                 {
1531                         if(setupPoint(renderer, *primitive, *triangle, draw))
1532                         {
1533                                 primitive->area = 0.5f * d;
1534
1535                                 primitive++;
1536                                 visible++;
1537                         }
1538
1539                         triangle++;
1540                 }
1541
1542                 return visible;
1543         }
1544
1545         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1546         {
1547                 Triangle *triangle = renderer->triangleBatch[unit];
1548                 Primitive *primitive = renderer->primitiveBatch[unit];
1549                 int visible = 0;
1550
1551                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1552                 SetupProcessor::State &state = draw.setupState;
1553
1554                 int ms = state.multiSample;
1555
1556                 for(int i = 0; i < count; i++)
1557                 {
1558                         if(setupLine(renderer, *primitive, *triangle, draw))
1559                         {
1560                                 primitive += ms;
1561                                 visible++;
1562                         }
1563
1564                         triangle++;
1565                 }
1566
1567                 return visible;
1568         }
1569
1570         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1571         {
1572                 Triangle *triangle = renderer->triangleBatch[unit];
1573                 Primitive *primitive = renderer->primitiveBatch[unit];
1574                 int visible = 0;
1575
1576                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1577                 SetupProcessor::State &state = draw.setupState;
1578
1579                 int ms = state.multiSample;
1580
1581                 for(int i = 0; i < count; i++)
1582                 {
1583                         if(setupPoint(renderer, *primitive, *triangle, draw))
1584                         {
1585                                 primitive += ms;
1586                                 visible++;
1587                         }
1588
1589                         triangle++;
1590                 }
1591
1592                 return visible;
1593         }
1594
1595         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1596         {
1597                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1598                 const SetupProcessor::State &state = draw.setupState;
1599                 const DrawData &data = *draw.data;
1600
1601                 float lineWidth = data.lineWidth;
1602
1603                 Vertex &v0 = triangle.v0;
1604                 Vertex &v1 = triangle.v1;
1605
1606                 int pos = state.positionRegister;
1607
1608                 const float4 &P0 = v0.v[pos];
1609                 const float4 &P1 = v1.v[pos];
1610
1611                 if(P0.w <= 0 && P1.w <= 0)
1612                 {
1613                         return false;
1614                 }
1615
1616                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1617                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1618
1619                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1620                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1621
1622                 if(dx == 0 && dy == 0)
1623                 {
1624                         return false;
1625                 }
1626
1627                 if(false)   // Rectangle
1628                 {
1629                         float4 P[4];
1630                         int C[4];
1631
1632                         P[0] = P0;
1633                         P[1] = P1;
1634                         P[2] = P1;
1635                         P[3] = P0;
1636
1637                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1638
1639                         dx *= scale;
1640                         dy *= scale;
1641
1642                         float dx0w = dx * P0.w / W;
1643                         float dy0h = dy * P0.w / H;
1644                         float dx0h = dx * P0.w / H;
1645                         float dy0w = dy * P0.w / W;
1646
1647                         float dx1w = dx * P1.w / W;
1648                         float dy1h = dy * P1.w / H;
1649                         float dx1h = dx * P1.w / H;
1650                         float dy1w = dy * P1.w / W;
1651
1652                         P[0].x += -dy0w + -dx0w;
1653                         P[0].y += -dx0h + +dy0h;
1654                         C[0] = computeClipFlags(P[0], data);
1655
1656                         P[1].x += -dy1w + +dx1w;
1657                         P[1].y += -dx1h + +dy1h;
1658                         C[1] = computeClipFlags(P[1], data);
1659
1660                         P[2].x += +dy1w + +dx1w;
1661                         P[2].y += +dx1h + -dy1h;
1662                         C[2] = computeClipFlags(P[2], data);
1663
1664                         P[3].x += +dy0w + -dx0w;
1665                         P[3].y += +dx0h + +dy0h;
1666                         C[3] = computeClipFlags(P[3], data);
1667
1668                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1669                         {
1670                                 Polygon polygon(P, 4);
1671
1672                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1673
1674                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1675                                 {
1676                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1677                                         {
1678                                                 return false;
1679                                         }
1680                                 }
1681
1682                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1683                         }
1684                 }
1685                 else   // Diamond test convention
1686                 {
1687                         float4 P[8];
1688                         int C[8];
1689
1690                         P[0] = P0;
1691                         P[1] = P0;
1692                         P[2] = P0;
1693                         P[3] = P0;
1694                         P[4] = P1;
1695                         P[5] = P1;
1696                         P[6] = P1;
1697                         P[7] = P1;
1698
1699                         float dx0 = lineWidth * 0.5f * P0.w / W;
1700                         float dy0 = lineWidth * 0.5f * P0.w / H;
1701
1702                         float dx1 = lineWidth * 0.5f * P1.w / W;
1703                         float dy1 = lineWidth * 0.5f * P1.w / H;
1704
1705                         P[0].x += -dx0;
1706                         C[0] = computeClipFlags(P[0], data);
1707
1708                         P[1].y += +dy0;
1709                         C[1] = computeClipFlags(P[1], data);
1710
1711                         P[2].x += +dx0;
1712                         C[2] = computeClipFlags(P[2], data);
1713
1714                         P[3].y += -dy0;
1715                         C[3] = computeClipFlags(P[3], data);
1716
1717                         P[4].x += -dx1;
1718                         C[4] = computeClipFlags(P[4], data);
1719
1720                         P[5].y += +dy1;
1721                         C[5] = computeClipFlags(P[5], data);
1722
1723                         P[6].x += +dx1;
1724                         C[6] = computeClipFlags(P[6], data);
1725
1726                         P[7].y += -dy1;
1727                         C[7] = computeClipFlags(P[7], data);
1728
1729                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1730                         {
1731                                 float4 L[6];
1732
1733                                 if(dx > -dy)
1734                                 {
1735                                         if(dx > dy)   // Right
1736                                         {
1737                                                 L[0] = P[0];
1738                                                 L[1] = P[1];
1739                                                 L[2] = P[5];
1740                                                 L[3] = P[6];
1741                                                 L[4] = P[7];
1742                                                 L[5] = P[3];
1743                                         }
1744                                         else   // Down
1745                                         {
1746                                                 L[0] = P[0];
1747                                                 L[1] = P[4];
1748                                                 L[2] = P[5];
1749                                                 L[3] = P[6];
1750                                                 L[4] = P[2];
1751                                                 L[5] = P[3];
1752                                         }
1753                                 }
1754                                 else
1755                                 {
1756                                         if(dx > dy)   // Up
1757                                         {
1758                                                 L[0] = P[0];
1759                                                 L[1] = P[1];
1760                                                 L[2] = P[2];
1761                                                 L[3] = P[6];
1762                                                 L[4] = P[7];
1763                                                 L[5] = P[4];
1764                                         }
1765                                         else   // Left
1766                                         {
1767                                                 L[0] = P[1];
1768                                                 L[1] = P[2];
1769                                                 L[2] = P[3];
1770                                                 L[3] = P[7];
1771                                                 L[4] = P[4];
1772                                                 L[5] = P[5];
1773                                         }
1774                                 }
1775
1776                                 Polygon polygon(L, 6);
1777
1778                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1779
1780                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1781                                 {
1782                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1783                                         {
1784                                                 return false;
1785                                         }
1786                                 }
1787
1788                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1789                         }
1790                 }
1791
1792                 return false;
1793         }
1794
1795         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1796         {
1797                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1798                 const SetupProcessor::State &state = draw.setupState;
1799                 const DrawData &data = *draw.data;
1800
1801                 Vertex &v = triangle.v0;
1802
1803                 float pSize;
1804
1805                 int pts = state.pointSizeRegister;
1806
1807                 if(state.pointSizeRegister != 0xF)
1808                 {
1809                         pSize = v.v[pts].y;
1810                 }
1811                 else
1812                 {
1813                         pSize = data.point.pointSize[0];
1814                 }
1815
1816                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1817
1818                 float4 P[4];
1819                 int C[4];
1820
1821                 int pos = state.positionRegister;
1822
1823                 P[0] = v.v[pos];
1824                 P[1] = v.v[pos];
1825                 P[2] = v.v[pos];
1826                 P[3] = v.v[pos];
1827
1828                 const float X = pSize * P[0].w * data.halfPixelX[0];
1829                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1830
1831                 P[0].x -= X;
1832                 P[0].y += Y;
1833                 C[0] = computeClipFlags(P[0], data);
1834
1835                 P[1].x += X;
1836                 P[1].y += Y;
1837                 C[1] = computeClipFlags(P[1], data);
1838
1839                 P[2].x += X;
1840                 P[2].y -= Y;
1841                 C[2] = computeClipFlags(P[2], data);
1842
1843                 P[3].x -= X;
1844                 P[3].y -= Y;
1845                 C[3] = computeClipFlags(P[3], data);
1846
1847                 triangle.v1 = triangle.v0;
1848                 triangle.v2 = triangle.v0;
1849
1850                 triangle.v1.X += iround(16 * 0.5f * pSize);
1851                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1852
1853                 Polygon polygon(P, 4);
1854
1855                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1856                 {
1857                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1858
1859                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1860                         {
1861                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1862                                 {
1863                                         return false;
1864                                 }
1865                         }
1866                         
1867                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1868                 }
1869
1870                 return false;
1871         }
1872
1873         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1874         {
1875                 return ((v.x > v.w)  << 0) |
1876                            ((v.y > v.w)  << 1) |
1877                            ((v.z > v.w)  << 2) |
1878                            ((v.x < -v.w) << 3) |
1879                        ((v.y < -v.w) << 4) |
1880                            ((v.z < 0)    << 5) |
1881                            Clipper::CLIP_FINITE;   // FIXME: xyz finite
1882         }
1883
1884         void Renderer::initializeThreads()
1885         {
1886                 unitCount = ceilPow2(threadCount);
1887                 clusterCount = ceilPow2(threadCount);
1888
1889                 for(int i = 0; i < unitCount; i++)
1890                 {
1891                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1892                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1893                 }
1894
1895                 for(int i = 0; i < threadCount; i++)
1896                 {
1897                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1898                         vertexTask[i]->vertexCache.drawCall = -1;
1899
1900                         task[i].type = Task::SUSPEND;
1901
1902                         resume[i] = new Event();
1903                         suspend[i] = new Event();
1904
1905                         Parameters parameters;
1906                         parameters.threadIndex = i;
1907                         parameters.renderer = this;
1908
1909                         exitThreads = false;
1910                         worker[i] = new Thread(threadFunction, &parameters);
1911
1912                         suspend[i]->wait();
1913                         suspend[i]->signal();
1914                 }
1915         }
1916
1917         void Renderer::terminateThreads()
1918         {
1919                 while(threadsAwake != 0)
1920                 {
1921                         Thread::sleep(1);
1922                 }
1923
1924                 for(int thread = 0; thread < threadCount; thread++)
1925                 {
1926                         if(worker[thread])
1927                         {
1928                                 exitThreads = true;
1929                                 resume[thread]->signal();
1930                                 worker[thread]->join();
1931                                 
1932                                 delete worker[thread];
1933                                 worker[thread] = 0;
1934                                 delete resume[thread];
1935                                 resume[thread] = 0;
1936                                 delete suspend[thread];
1937                                 suspend[thread] = 0;
1938                         }
1939                 
1940                         deallocate(vertexTask[thread]);
1941                         vertexTask[thread] = 0;
1942                 }
1943
1944                 for(int i = 0; i < 16; i++)
1945                 {
1946                         deallocate(triangleBatch[i]);
1947                         triangleBatch[i] = 0;
1948
1949                         deallocate(primitiveBatch[i]);
1950                         primitiveBatch[i] = 0;
1951                 }
1952         }
1953
1954         void Renderer::loadConstants(const VertexShader *vertexShader)
1955         {
1956                 if(!vertexShader) return;
1957
1958                 size_t count = vertexShader->getLength();
1959
1960                 for(size_t i = 0; i < count; i++)
1961                 {
1962                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1963
1964                         if(instruction->opcode == Shader::OPCODE_DEF)
1965                         {
1966                                 int index = instruction->dst.index;
1967                                 float value[4];
1968
1969                                 value[0] = instruction->src[0].value[0];
1970                                 value[1] = instruction->src[0].value[1];
1971                                 value[2] = instruction->src[0].value[2];
1972                                 value[3] = instruction->src[0].value[3];
1973
1974                                 setVertexShaderConstantF(index, value);
1975                         }
1976                         else if(instruction->opcode == Shader::OPCODE_DEFI)
1977                         {
1978                                 int index = instruction->dst.index;
1979                                 int integer[4];
1980
1981                                 integer[0] = instruction->src[0].integer[0];
1982                                 integer[1] = instruction->src[0].integer[1];
1983                                 integer[2] = instruction->src[0].integer[2];
1984                                 integer[3] = instruction->src[0].integer[3];
1985
1986                                 setVertexShaderConstantI(index, integer);
1987                         }
1988                         else if(instruction->opcode == Shader::OPCODE_DEFB)
1989                         {
1990                                 int index = instruction->dst.index;
1991                                 int boolean = instruction->src[0].boolean[0];
1992
1993                                 setVertexShaderConstantB(index, &boolean);
1994                         }
1995                 }
1996         }
1997
1998         void Renderer::loadConstants(const PixelShader *pixelShader)
1999         {
2000                 if(!pixelShader) return;
2001
2002                 size_t count = pixelShader->getLength();
2003
2004                 for(size_t i = 0; i < count; i++)
2005                 {
2006                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2007
2008                         if(instruction->opcode == Shader::OPCODE_DEF)
2009                         {
2010                                 int index = instruction->dst.index;
2011                                 float value[4];
2012
2013                                 value[0] = instruction->src[0].value[0];
2014                                 value[1] = instruction->src[0].value[1];
2015                                 value[2] = instruction->src[0].value[2];
2016                                 value[3] = instruction->src[0].value[3];
2017
2018                                 setPixelShaderConstantF(index, value);
2019                         }
2020                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2021                         {
2022                                 int index = instruction->dst.index;
2023                                 int integer[4];
2024
2025                                 integer[0] = instruction->src[0].integer[0];
2026                                 integer[1] = instruction->src[0].integer[1];
2027                                 integer[2] = instruction->src[0].integer[2];
2028                                 integer[3] = instruction->src[0].integer[3];
2029
2030                                 setPixelShaderConstantI(index, integer);
2031                         }
2032                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2033                         {
2034                                 int index = instruction->dst.index;
2035                                 int boolean = instruction->src[0].boolean[0];
2036
2037                                 setPixelShaderConstantB(index, &boolean);
2038                         }
2039                 }
2040         }
2041
2042         void Renderer::setIndexBuffer(Resource *indexBuffer)
2043         {
2044                 context->indexBuffer = indexBuffer;
2045         }
2046
2047         void Renderer::setMultiSampleMask(unsigned int mask)
2048         {
2049                 context->sampleMask = mask;
2050         }
2051
2052         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2053         {
2054                 sw::transparencyAntialiasing = transparencyAntialiasing;
2055         }
2056
2057         bool Renderer::isReadWriteTexture(int sampler)
2058         {
2059                 for(int index = 0; index < RENDERTARGETS; index++)
2060                 {
2061                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2062                         {
2063                                 return true;
2064                         }
2065                 }
2066         
2067                 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2068                 {
2069                         return true;
2070                 }
2071
2072                 return false;
2073         }
2074         
2075         void Renderer::updateClipper()
2076         {
2077                 if(updateClipPlanes)
2078                 {
2079                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2080                         {
2081                                 const Matrix &scissorWorld = getViewTransform();
2082
2083                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2084                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2085                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2086                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2087                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2088                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2089                         }
2090                         else   // User plane in clip space
2091                         {
2092                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2093                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2094                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2095                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2096                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2097                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2098                         }
2099
2100                         updateClipPlanes = false;
2101                 }
2102         }
2103
2104         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2105         {
2106                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2107
2108                 context->texture[sampler] = resource;
2109         }
2110
2111         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2112         {
2113                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2114                 
2115                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2116         }
2117
2118         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2119         {
2120                 if(type == SAMPLER_PIXEL)
2121                 {
2122                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2123                 }
2124                 else
2125                 {
2126                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2127                 }
2128         }
2129
2130         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2131         {
2132                 if(type == SAMPLER_PIXEL)
2133                 {
2134                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2135                 }
2136                 else
2137                 {
2138                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2139                 }
2140         }
2141
2142         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2143         {
2144                 if(type == SAMPLER_PIXEL)
2145                 {
2146                         PixelProcessor::setGatherEnable(sampler, enable);
2147                 }
2148                 else
2149                 {
2150                         VertexProcessor::setGatherEnable(sampler, enable);
2151                 }
2152         }
2153
2154         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2155         {
2156                 if(type == SAMPLER_PIXEL)
2157                 {
2158                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2159                 }
2160                 else
2161                 {
2162                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2163                 }
2164         }
2165
2166         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2167         {
2168                 if(type == SAMPLER_PIXEL)
2169                 {
2170                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2171                 }
2172                 else
2173                 {
2174                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2175                 }
2176         }
2177
2178         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2179         {
2180                 if(type == SAMPLER_PIXEL)
2181                 {
2182                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2183                 }
2184                 else
2185                 {
2186                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2187                 }
2188         }
2189
2190         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2191         {
2192                 if(type == SAMPLER_PIXEL)
2193                 {
2194                         PixelProcessor::setReadSRGB(sampler, sRGB);
2195                 }
2196                 else
2197                 {
2198                         VertexProcessor::setReadSRGB(sampler, sRGB);
2199                 }
2200         }
2201
2202         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2203         {
2204                 if(type == SAMPLER_PIXEL)
2205                 {
2206                         PixelProcessor::setMipmapLOD(sampler, bias);
2207                 }
2208                 else
2209                 {
2210                         VertexProcessor::setMipmapLOD(sampler, bias);
2211                 }
2212         }
2213
2214         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2215         {
2216                 if(type == SAMPLER_PIXEL)
2217                 {
2218                         PixelProcessor::setBorderColor(sampler, borderColor);
2219                 }
2220                 else
2221                 {
2222                         VertexProcessor::setBorderColor(sampler, borderColor);
2223                 }
2224         }
2225
2226         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2227         {
2228                 if(type == SAMPLER_PIXEL)
2229                 {
2230                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2231                 }
2232                 else
2233                 {
2234                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2235                 }
2236         }
2237
2238         void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2239         {
2240                 if(type == SAMPLER_PIXEL)
2241                 {
2242                         PixelProcessor::setSwizzleR(sampler, swizzleR);
2243                 }
2244                 else
2245                 {
2246                         VertexProcessor::setSwizzleR(sampler, swizzleR);
2247                 }
2248         }
2249
2250         void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2251         {
2252                 if(type == SAMPLER_PIXEL)
2253                 {
2254                         PixelProcessor::setSwizzleG(sampler, swizzleG);
2255                 }
2256                 else
2257                 {
2258                         VertexProcessor::setSwizzleG(sampler, swizzleG);
2259                 }
2260         }
2261
2262         void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2263         {
2264                 if(type == SAMPLER_PIXEL)
2265                 {
2266                         PixelProcessor::setSwizzleB(sampler, swizzleB);
2267                 }
2268                 else
2269                 {
2270                         VertexProcessor::setSwizzleB(sampler, swizzleB);
2271                 }
2272         }
2273
2274         void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2275         {
2276                 if(type == SAMPLER_PIXEL)
2277                 {
2278                         PixelProcessor::setSwizzleA(sampler, swizzleA);
2279                 }
2280                 else
2281                 {
2282                         VertexProcessor::setSwizzleA(sampler, swizzleA);
2283                 }
2284         }
2285
2286         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2287         {
2288                 context->setPointSpriteEnable(pointSpriteEnable);
2289         }
2290
2291         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2292         {
2293                 context->setPointScaleEnable(pointScaleEnable);
2294         }
2295
2296         void Renderer::setLineWidth(float width)
2297         {
2298                 context->lineWidth = width;
2299         }
2300
2301         void Renderer::setDepthBias(float bias)
2302         {
2303                 depthBias = bias;
2304         }
2305
2306         void Renderer::setSlopeDepthBias(float slopeBias)
2307         {
2308                 slopeDepthBias = slopeBias;
2309         }
2310
2311         void Renderer::setPixelShader(const PixelShader *shader)
2312         {
2313                 context->pixelShader = shader;
2314
2315                 loadConstants(shader);
2316         }
2317
2318         void Renderer::setVertexShader(const VertexShader *shader)
2319         {
2320                 context->vertexShader = shader;
2321
2322                 loadConstants(shader);
2323         }
2324
2325         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2326         {
2327                 for(int i = 0; i < DRAW_COUNT; i++)
2328                 {
2329                         if(drawCall[i]->psDirtyConstF < index + count)
2330                         {
2331                                 drawCall[i]->psDirtyConstF = index + count;
2332                         }
2333                 }
2334
2335                 for(int i = 0; i < count; i++)
2336                 {
2337                         PixelProcessor::setFloatConstant(index + i, value);
2338                         value += 4;
2339                 }
2340         }
2341
2342         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2343         {
2344                 for(int i = 0; i < DRAW_COUNT; i++)
2345                 {
2346                         if(drawCall[i]->psDirtyConstI < index + count)
2347                         {
2348                                 drawCall[i]->psDirtyConstI = index + count;
2349                         }
2350                 }
2351
2352                 for(int i = 0; i < count; i++)
2353                 {
2354                         PixelProcessor::setIntegerConstant(index + i, value);
2355                         value += 4;
2356                 }
2357         }
2358
2359         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2360         {
2361                 for(int i = 0; i < DRAW_COUNT; i++)
2362                 {
2363                         if(drawCall[i]->psDirtyConstB < index + count)
2364                         {
2365                                 drawCall[i]->psDirtyConstB = index + count;
2366                         }
2367                 }
2368
2369                 for(int i = 0; i < count; i++)
2370                 {
2371                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2372                         boolean++;
2373                 }
2374         }
2375
2376         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2377         {
2378                 for(int i = 0; i < DRAW_COUNT; i++)
2379                 {
2380                         if(drawCall[i]->vsDirtyConstF < index + count)
2381                         {
2382                                 drawCall[i]->vsDirtyConstF = index + count;
2383                         }
2384                 }
2385
2386                 for(int i = 0; i < count; i++)
2387                 {
2388                         VertexProcessor::setFloatConstant(index + i, value);
2389                         value += 4;
2390                 }
2391         }
2392
2393         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2394         {
2395                 for(int i = 0; i < DRAW_COUNT; i++)
2396                 {
2397                         if(drawCall[i]->vsDirtyConstI < index + count)
2398                         {
2399                                 drawCall[i]->vsDirtyConstI = index + count;
2400                         }
2401                 }
2402
2403                 for(int i = 0; i < count; i++)
2404                 {
2405                         VertexProcessor::setIntegerConstant(index + i, value);
2406                         value += 4;
2407                 }
2408         }
2409
2410         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2411         {
2412                 for(int i = 0; i < DRAW_COUNT; i++)
2413                 {
2414                         if(drawCall[i]->vsDirtyConstB < index + count)
2415                         {
2416                                 drawCall[i]->vsDirtyConstB = index + count;
2417                         }
2418                 }
2419
2420                 for(int i = 0; i < count; i++)
2421                 {
2422                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2423                         boolean++;
2424                 }
2425         }
2426
2427         void Renderer::setModelMatrix(const Matrix &M, int i)
2428         {
2429                 VertexProcessor::setModelMatrix(M, i);
2430         }
2431
2432         void Renderer::setViewMatrix(const Matrix &V)
2433         {
2434                 VertexProcessor::setViewMatrix(V);
2435                 updateClipPlanes = true;
2436         }
2437
2438         void Renderer::setBaseMatrix(const Matrix &B)
2439         {
2440                 VertexProcessor::setBaseMatrix(B);
2441                 updateClipPlanes = true;
2442         }
2443
2444         void Renderer::setProjectionMatrix(const Matrix &P)
2445         {
2446                 VertexProcessor::setProjectionMatrix(P);
2447                 updateClipPlanes = true;
2448         }
2449
2450         void Renderer::addQuery(Query *query)
2451         {
2452                 queries.push_back(query);
2453         }
2454         
2455         void Renderer::removeQuery(Query *query)
2456         {
2457                 queries.remove(query);
2458         }
2459
2460         #if PERF_HUD
2461                 int Renderer::getThreadCount()
2462                 {
2463                         return threadCount;
2464                 }
2465                 
2466                 int64_t Renderer::getVertexTime(int thread)
2467                 {
2468                         return vertexTime[thread];
2469                 }
2470
2471                 int64_t Renderer::getSetupTime(int thread)
2472                 {
2473                         return setupTime[thread];
2474                 }
2475                         
2476                 int64_t Renderer::getPixelTime(int thread)
2477                 {
2478                         return pixelTime[thread];
2479                 }
2480
2481                 void Renderer::resetTimers()
2482                 {
2483                         for(int thread = 0; thread < threadCount; thread++)
2484                         {
2485                                 vertexTime[thread] = 0;
2486                                 setupTime[thread] = 0;
2487                                 pixelTime[thread] = 0;
2488                         }
2489                 }
2490         #endif
2491
2492         void Renderer::setViewport(const Viewport &viewport)
2493         {
2494                 this->viewport = viewport;
2495         }
2496
2497         void Renderer::setScissor(const Rect &scissor)
2498         {
2499                 this->scissor = scissor;
2500         }
2501
2502         void Renderer::setClipFlags(int flags)
2503         {
2504                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2505         }
2506
2507         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2508         {
2509                 if(index < MAX_CLIP_PLANES)
2510                 {
2511                         userPlane[index] = plane;
2512                 }
2513                 else ASSERT(false);
2514
2515                 updateClipPlanes = true;
2516         }
2517
2518         void Renderer::updateConfiguration(bool initialUpdate)
2519         {
2520                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2521
2522                 if(newConfiguration || initialUpdate)
2523                 {
2524                         terminateThreads();
2525
2526                         SwiftConfig::Configuration configuration = {};
2527                         swiftConfig->getConfiguration(configuration);
2528
2529                         precacheVertex = !newConfiguration && configuration.precache;
2530                         precacheSetup = !newConfiguration && configuration.precache;
2531                         precachePixel = !newConfiguration && configuration.precache;
2532
2533                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2534                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2535                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2536
2537                         switch(configuration.textureSampleQuality)
2538                         {
2539                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2540                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2541                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2542                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2543                         }
2544
2545                         switch(configuration.mipmapQuality)
2546                         {
2547                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2548                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2549                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2550                         }
2551
2552                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2553
2554                         switch(configuration.transcendentalPrecision)
2555                         {
2556                         case 0:
2557                                 logPrecision = APPROXIMATE;
2558                                 expPrecision = APPROXIMATE;
2559                                 rcpPrecision = APPROXIMATE;
2560                                 rsqPrecision = APPROXIMATE;
2561                                 break;
2562                         case 1:
2563                                 logPrecision = PARTIAL;
2564                                 expPrecision = PARTIAL;
2565                                 rcpPrecision = PARTIAL;
2566                                 rsqPrecision = PARTIAL;
2567                                 break;
2568                         case 2:
2569                                 logPrecision = ACCURATE;
2570                                 expPrecision = ACCURATE;
2571                                 rcpPrecision = ACCURATE;
2572                                 rsqPrecision = ACCURATE;
2573                                 break;
2574                         case 3:
2575                                 logPrecision = WHQL;
2576                                 expPrecision = WHQL;
2577                                 rcpPrecision = WHQL;
2578                                 rsqPrecision = WHQL;
2579                                 break;
2580                         case 4:
2581                                 logPrecision = IEEE;
2582                                 expPrecision = IEEE;
2583                                 rcpPrecision = IEEE;
2584                                 rsqPrecision = IEEE;
2585                                 break;
2586                         default:
2587                                 logPrecision = ACCURATE;
2588                                 expPrecision = ACCURATE;
2589                                 rcpPrecision = ACCURATE;
2590                                 rsqPrecision = ACCURATE;
2591                                 break;
2592                         }
2593
2594                         switch(configuration.transparencyAntialiasing)
2595                         {
2596                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2597                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2598                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2599                         }
2600
2601                         switch(configuration.threadCount)
2602                         {
2603                         case -1: threadCount = CPUID::coreCount();        break;
2604                         case 0:  threadCount = CPUID::processAffinity();  break;
2605                         default: threadCount = configuration.threadCount; break;
2606                         }
2607
2608                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2609                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2610                         CPUID::setEnableSSE3(configuration.enableSSE3);
2611                         CPUID::setEnableSSE2(configuration.enableSSE2);
2612                         CPUID::setEnableSSE(configuration.enableSSE);
2613
2614                         for(int pass = 0; pass < 10; pass++)
2615                         {
2616                                 optimization[pass] = configuration.optimization[pass];
2617                         }
2618
2619                         forceWindowed = configuration.forceWindowed;
2620                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2621                         postBlendSRGB = configuration.postBlendSRGB;
2622                         exactColorRounding = configuration.exactColorRounding;
2623                         forceClearRegisters = configuration.forceClearRegisters;
2624
2625                 #ifndef NDEBUG
2626                         minPrimitives = configuration.minPrimitives;
2627                         maxPrimitives = configuration.maxPrimitives;
2628                 #endif
2629                 }
2630
2631                 if(!initialUpdate && !worker[0])
2632                 {
2633                         initializeThreads();
2634                 }
2635         }
2636 }