OSDN Git Service

Use the main thread for draw execution when single-threaded.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2012 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Renderer.hpp"
13
14 #include "Clipper.hpp"
15 #include "Math.hpp"
16 #include "FrameBuffer.hpp"
17 #include "Timer.hpp"
18 #include "Surface.hpp"
19 #include "Half.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
24 #include "CPUID.hpp"
25 #include "Memory.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
28 #include "Debug.hpp"
29 #include "Reactor/Reactor.hpp"
30
31 #include <malloc.h>
32
33 #undef max
34
35 bool disableServer = true;
36
37 #ifndef NDEBUG
38 unsigned int minPrimitives = 1;
39 unsigned int maxPrimitives = 1 << 21;
40 #endif
41
42 namespace sw
43 {
44         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
45         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
46         extern bool booleanFaceRegister;
47         extern bool fullPixelPositionRegister;
48         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
49         extern bool secondaryColor;             // Specular lighting is applied after texturing
50
51         extern bool forceWindowed;
52         extern bool complementaryDepthBuffer;
53         extern bool postBlendSRGB;
54         extern bool exactColorRounding;
55         extern TransparencyAntialiasing transparencyAntialiasing;
56         extern bool forceClearRegisters;
57
58         extern bool precacheVertex;
59         extern bool precacheSetup;
60         extern bool precachePixel;
61
62         int batchSize = 128;
63         int threadCount = 1;
64         int unitCount = 1;
65         int clusterCount = 1;
66
67         TranscendentalPrecision logPrecision = ACCURATE;
68         TranscendentalPrecision expPrecision = ACCURATE;
69         TranscendentalPrecision rcpPrecision = ACCURATE;
70         TranscendentalPrecision rsqPrecision = ACCURATE;
71         bool perspectiveCorrection = true;
72
73         struct Parameters
74         {
75                 Renderer *renderer;
76                 int threadIndex;
77         };
78
79         DrawCall::DrawCall()
80         {
81                 queries = 0;
82
83                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
84                 vsDirtyConstI = 16;
85                 vsDirtyConstB = 16;
86
87                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
88                 psDirtyConstI = 16;
89                 psDirtyConstB = 16;
90
91                 references = -1;
92
93                 data = (DrawData*)allocate(sizeof(DrawData));
94                 data->constants = &constants;
95         }
96
97         DrawCall::~DrawCall()
98         {
99                 delete queries;
100
101                 deallocate(data);
102         }
103
104         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
105         {
106                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
107                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
108                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
109                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
110                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
111                 sw::secondaryColor = conventions.secondaryColor;
112                 sw::exactColorRounding = exactColorRounding;
113
114                 setRenderTarget(0, 0);
115                 clipper = new Clipper();
116
117                 updateViewMatrix = true;
118                 updateBaseMatrix = true;
119                 updateProjectionMatrix = true;
120                 updateClipPlanes = true;
121
122                 #if PERF_HUD
123                         resetTimers();
124                 #endif
125
126                 for(int i = 0; i < 16; i++)
127                 {
128                         vertexTask[i] = 0;
129
130                         worker[i] = 0;
131                         resume[i] = 0;
132                         suspend[i] = 0;
133                 }
134
135                 threadsAwake = 0;
136                 resumeApp = new Event();
137
138                 currentDraw = 0;
139                 nextDraw = 0;
140
141                 qHead = 0;
142                 qSize = 0;
143
144                 for(int i = 0; i < 16; i++)
145                 {
146                         triangleBatch[i] = 0;
147                         primitiveBatch[i] = 0;
148                 }
149
150                 for(int draw = 0; draw < DRAW_COUNT; draw++)
151                 {
152                         drawCall[draw] = new DrawCall();
153                         drawList[draw] = drawCall[draw];
154                 }
155
156                 for(int unit = 0; unit < 16; unit++)
157                 {
158                         primitiveProgress[unit].init();
159                 }
160
161                 for(int cluster = 0; cluster < 16; cluster++)
162                 {
163                         pixelProgress[cluster].init();
164                 }
165
166                 clipFlags = 0;
167
168                 swiftConfig = new SwiftConfig(disableServer);
169                 updateConfiguration(true);
170
171                 sync = new Resource(0);
172         }
173
174         Renderer::~Renderer()
175         {
176                 sync->destruct();
177
178                 delete clipper;
179                 clipper = 0;
180
181                 terminateThreads();
182                 delete resumeApp;
183
184                 for(int draw = 0; draw < DRAW_COUNT; draw++)
185                 {
186                         delete drawCall[draw];
187                 }
188
189                 delete swiftConfig;
190         }
191
192         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
193         {
194                 blitter.blit(source, sRect, dest, dRect, filter);
195         }
196
197         void Renderer::blit3D(Surface *source, Surface *dest)
198         {
199                 blitter.blit3D(source, dest);
200         }
201
202         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
203         {
204                 #ifndef NDEBUG
205                         if(count < minPrimitives || count > maxPrimitives)
206                         {
207                                 return;
208                         }
209                 #endif
210
211                 context->drawType = drawType;
212
213                 updateConfiguration();
214                 updateClipper();
215
216                 int ss = context->getSuperSampleCount();
217                 int ms = context->getMultiSampleCount();
218
219                 for(int q = 0; q < ss; q++)
220                 {
221                         int oldMultiSampleMask = context->multiSampleMask;
222                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
223
224                         if(!context->multiSampleMask)
225                         {
226                                 continue;
227                         }
228
229                         sync->lock(sw::PRIVATE);
230
231                         Routine *vertexRoutine;
232                         Routine *setupRoutine;
233                         Routine *pixelRoutine;
234
235                         if(update || oldMultiSampleMask != context->multiSampleMask)
236                         {
237                                 vertexState = VertexProcessor::update();
238                                 setupState = SetupProcessor::update();
239                                 pixelState = PixelProcessor::update();
240
241                                 vertexRoutine = VertexProcessor::routine(vertexState);
242                                 setupRoutine = SetupProcessor::routine(setupState);
243                                 pixelRoutine = PixelProcessor::routine(pixelState);
244                         }
245
246                         int batch = batchSize / ms;
247
248                         int (*setupPrimitives)(Renderer *renderer, int batch, int count);
249
250                         if(context->isDrawTriangle())
251                         {
252                                 switch(context->fillMode)
253                                 {
254                                 case FILL_SOLID:
255                                         setupPrimitives = setupSolidTriangles;
256                                         break;
257                                 case FILL_WIREFRAME:
258                                         setupPrimitives = setupWireframeTriangle;
259                                         batch = 1;
260                                         break;
261                                 case FILL_VERTEX:
262                                         setupPrimitives = setupVertexTriangle;
263                                         batch = 1;
264                                         break;
265                                 default: ASSERT(false);
266                                 }
267                         }
268                         else if(context->isDrawLine())
269                         {
270                                 setupPrimitives = setupLines;
271                         }
272                         else   // Point draw
273                         {
274                                 setupPrimitives = setupPoints;
275                         }
276
277                         DrawCall *draw = 0;
278
279                         do
280                         {
281                                 for(int i = 0; i < DRAW_COUNT; i++)
282                                 {
283                                         if(drawCall[i]->references == -1)
284                                         {
285                                                 draw = drawCall[i];
286                                                 drawList[nextDraw % DRAW_COUNT] = draw;
287
288                                                 break;
289                                         }
290                                 }
291
292                                 if(!draw)
293                                 {
294                                         resumeApp->wait();
295                                 }
296                         }
297                         while(!draw);
298
299                         DrawData *data = draw->data;
300
301                         if(queries.size() != 0)
302                         {
303                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
304                                 {
305                                         atomicIncrement(&(*query)->reference);
306                                 }
307
308                                 draw->queries = new std::list<Query*>(queries);
309                         }
310
311                         draw->drawType = drawType;
312                         draw->batchSize = batch;
313
314                         vertexRoutine->bind();
315                         setupRoutine->bind();
316                         pixelRoutine->bind();
317
318                         draw->vertexRoutine = vertexRoutine;
319                         draw->setupRoutine = setupRoutine;
320                         draw->pixelRoutine = pixelRoutine;
321                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
322                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
323                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
324                         draw->setupPrimitives = setupPrimitives;
325                         draw->setupState = setupState;
326
327                         for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
328                         {
329                                 draw->vertexStream[i] = context->input[i].resource;
330                                 data->input[i] = context->input[i].buffer;
331                                 data->stride[i] = context->input[i].stride;
332
333                                 if(draw->vertexStream[i])
334                                 {
335                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
336                                 }
337                         }
338
339                         if(context->indexBuffer)
340                         {
341                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
342                         }
343
344                         draw->indexBuffer = context->indexBuffer;
345
346                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
347                         {
348                                 draw->texture[sampler] = 0;
349                         }
350
351                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
352                         {
353                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
354                                 {
355                                         draw->texture[sampler] = context->texture[sampler];
356                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
357
358                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
359                                 }
360                         }
361
362                         if(context->pixelShader)
363                         {
364                                 if(draw->psDirtyConstF)
365                                 {
366                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
367                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
368                                         draw->psDirtyConstF = 0;
369                                 }
370
371                                 if(draw->psDirtyConstI)
372                                 {
373                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
374                                         draw->psDirtyConstI = 0;
375                                 }
376
377                                 if(draw->psDirtyConstB)
378                                 {
379                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
380                                         draw->psDirtyConstB = 0;
381                                 }
382                         }
383                         
384                         if(context->pixelShaderVersion() <= 0x0104)
385                         {
386                                 for(int stage = 0; stage < 8; stage++)
387                                 {
388                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
389                                         {
390                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
391                                         }
392                                         else break;
393                                 }
394                         }
395
396                         if(context->vertexShader)
397                         {
398                                 if(context->vertexShader->getVersion() >= 0x0300)
399                                 {
400                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
401                                         {
402                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
403                                                 {
404                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
405                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
406
407                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
408                                                 }
409                                         }
410                                 }
411
412                                 if(draw->vsDirtyConstF)
413                                 {
414                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
415                                         draw->vsDirtyConstF = 0;
416                                 }
417
418                                 if(draw->vsDirtyConstI)
419                                 {
420                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
421                                         draw->vsDirtyConstI = 0;
422                                 }
423
424                                 if(draw->vsDirtyConstB)
425                                 {
426                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
427                                         draw->vsDirtyConstB = 0;
428                                 }
429
430                                 if(context->vertexShader->instanceIdDeclared)
431                                 {
432                                         data->instanceID = context->instanceID;
433                                 }
434                         }
435                         else
436                         {
437                                 data->ff = ff;
438
439                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
440                                 draw->vsDirtyConstI = 16;
441                                 draw->vsDirtyConstB = 16;
442                         }
443
444                         if(pixelState.stencilActive)
445                         {
446                                 data->stencil[0] = stencil;
447                                 data->stencil[1] = stencilCCW;
448                         }
449
450                         if(pixelState.fogActive)
451                         {
452                                 data->fog = fog;
453                         }
454
455                         if(setupState.isDrawPoint)
456                         {
457                                 data->point = point;
458                         }
459
460                         data->lineWidth = context->lineWidth;
461
462                         data->factor = factor;
463
464                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
465                         {
466                                 float ref = context->alphaReference * (1.0f / 255.0f);
467                                 float margin = sw::min(ref, 1.0f - ref);
468
469                                 if(ms == 4)
470                                 {
471                                         data->a2c0 = replicate(ref - margin * 0.6f);
472                                         data->a2c1 = replicate(ref - margin * 0.2f);
473                                         data->a2c2 = replicate(ref + margin * 0.2f);
474                                         data->a2c3 = replicate(ref + margin * 0.6f);
475                                 }
476                                 else if(ms == 2)
477                                 {
478                                         data->a2c0 = replicate(ref - margin * 0.3f);
479                                         data->a2c1 = replicate(ref + margin * 0.3f);
480                                 }
481                                 else ASSERT(false);
482                         }
483
484                         if(pixelState.occlusionEnabled)
485                         {
486                                 for(int cluster = 0; cluster < clusterCount; cluster++)
487                                 {
488                                         data->occlusion[cluster] = 0;
489                                 }
490                         }
491
492                         #if PERF_PROFILE
493                                 for(int cluster = 0; cluster < clusterCount; cluster++)
494                                 {
495                                         for(int i = 0; i < PERF_TIMERS; i++)
496                                         {
497                                                 data->cycles[i][cluster] = 0;
498                                         }
499                                 }
500                         #endif
501
502                         // Viewport
503                         {
504                                 float W = 0.5f * viewport.width;
505                                 float H = 0.5f * viewport.height;
506                                 float X0 = viewport.x0 + W;
507                                 float Y0 = viewport.y0 + H;
508                                 float N = viewport.minZ;
509                                 float F = viewport.maxZ;
510                                 float Z = F - N;
511
512                                 if(context->isDrawTriangle(false))
513                                 {
514                                         N += depthBias;
515                                 }
516
517                                 if(complementaryDepthBuffer)
518                                 {
519                                         Z = -Z;
520                                         N = 1 - N;
521                                 }
522
523                                 static const float X[5][16] =   // Fragment offsets
524                                 {
525                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
526                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
527                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
528                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
529                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
530                                 };
531
532                                 static const float Y[5][16] =   // Fragment offsets
533                                 {
534                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
535                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
536                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
537                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
538                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
539                                 };
540
541                                 int s = sw::log2(ss);
542
543                                 data->Wx16 = replicate(W * 16);
544                                 data->Hx16 = replicate(H * 16);
545                                 data->X0x16 = replicate(X0 * 16 - 8);
546                                 data->Y0x16 = replicate(Y0 * 16 - 8);
547                                 data->XXXX = replicate(X[s][q] / W);
548                                 data->YYYY = replicate(Y[s][q] / H);
549                                 data->halfPixelX = replicate(0.5f / W);
550                                 data->halfPixelY = replicate(0.5f / H);
551                                 data->viewportHeight = abs(viewport.height);
552                                 data->slopeDepthBias = slopeDepthBias;
553                                 data->depthRange = Z;
554                                 data->depthNear = N;
555                                 draw->clipFlags = clipFlags;
556
557                                 if(clipFlags)
558                                 {
559                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
560                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
561                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
562                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
563                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
564                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
565                                 }
566                         }
567
568                         // Target
569                         {
570                                 for(int index = 0; index < 4; index++)
571                                 {
572                                         draw->renderTarget[index] = context->renderTarget[index];
573
574                                         if(draw->renderTarget[index])
575                                         {
576                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
577                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
578                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
579                                         }
580                                 }
581
582                                 draw->depthStencil = context->depthStencil;
583
584                                 if(draw->depthStencil)
585                                 {
586                                         data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
587                                         data->depthPitchB = context->depthStencil->getInternalPitchB();
588                                         data->depthSliceB = context->depthStencil->getInternalSliceB();
589
590                                         data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
591                                         data->stencilPitchB = context->depthStencil->getStencilPitchB();
592                                         data->stencilSliceB = context->depthStencil->getStencilSliceB();
593                                 }
594                         }
595
596                         // Scissor
597                         {
598                                 data->scissorX0 = scissor.x0;
599                                 data->scissorX1 = scissor.x1;
600                                 data->scissorY0 = scissor.y0;
601                                 data->scissorY1 = scissor.y1;
602                         }
603
604                         draw->primitive = 0;
605                         draw->count = count;
606
607                         draw->references = (count + batch - 1) / batch;
608
609                         schedulerMutex.lock();
610                         nextDraw++;
611                         schedulerMutex.unlock();
612
613                         if(threadCount > 1)
614                         {
615                                 if(!threadsAwake)
616                                 {
617                                         suspend[0]->wait();
618
619                                         threadsAwake = 1;
620                                         task[0].type = Task::RESUME;
621
622                                         resume[0]->signal();
623                                 }
624                         }
625                         else   // Use main thread for draw execution
626                         {
627                                 threadsAwake = 1;
628                                 task[0].type = Task::RESUME;
629
630                                 taskLoop(0);
631                         }
632                 }
633         }
634
635         void Renderer::threadFunction(void *parameters)
636         {
637                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
638                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
639
640                 if(logPrecision < IEEE)
641                 {
642                         CPUID::setFlushToZero(true);
643                         CPUID::setDenormalsAreZero(true);
644                 }
645
646                 renderer->threadLoop(threadIndex);
647         }
648
649         void Renderer::threadLoop(int threadIndex)
650         {
651                 while(!exitThreads)
652                 {
653                         taskLoop(threadIndex);
654
655                         suspend[threadIndex]->signal();
656                         resume[threadIndex]->wait();
657                 }
658         }
659
660         void Renderer::taskLoop(int threadIndex)
661         {
662                 while(task[threadIndex].type != Task::SUSPEND)
663                 {
664                         scheduleTask(threadIndex);
665                         executeTask(threadIndex);
666                 }
667         }
668
669         void Renderer::findAvailableTasks()
670         {
671                 // Find pixel tasks
672                 for(int cluster = 0; cluster < clusterCount; cluster++)
673                 {
674                         if(!pixelProgress[cluster].executing)
675                         {
676                                 for(int unit = 0; unit < unitCount; unit++)
677                                 {
678                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
679                                         {
680                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
681                                                 {
682                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
683                                                         {
684                                                                 Task &task = taskQueue[qHead];
685                                                                 task.type = Task::PIXELS;
686                                                                 task.primitiveUnit = unit;
687                                                                 task.pixelCluster = cluster;
688
689                                                                 pixelProgress[cluster].executing = true;
690
691                                                                 // Commit to the task queue
692                                                                 qHead = (qHead + 1) % 32;
693                                                                 qSize++;
694
695                                                                 break;
696                                                         }
697                                                 }
698                                         }
699                                 }
700                         }
701                 }
702         
703                 // Find primitive tasks
704                 if(currentDraw == nextDraw)
705                 {
706                         return;   // No more primitives to process
707                 }
708
709                 for(int unit = 0; unit < unitCount; unit++)
710                 {
711                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
712
713                         if(draw->primitive >= draw->count)
714                         {
715                                 currentDraw++;
716
717                                 if(currentDraw == nextDraw)
718                                 {
719                                         return;   // No more primitives to process
720                                 }
721
722                                 draw = drawList[currentDraw % DRAW_COUNT];
723                         }
724
725                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
726                         {
727                                 int primitive = draw->primitive;
728                                 int count = draw->count;
729                                 int batch = draw->batchSize;
730
731                                 primitiveProgress[unit].drawCall = currentDraw;
732                                 primitiveProgress[unit].firstPrimitive = primitive;
733                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
734
735                                 draw->primitive += batch;
736
737                                 Task &task = taskQueue[qHead];
738                                 task.type = Task::PRIMITIVES;
739                                 task.primitiveUnit = unit;
740
741                                 primitiveProgress[unit].references = -1;
742
743                                 // Commit to the task queue
744                                 qHead = (qHead + 1) % 32;
745                                 qSize++;
746                         }
747                 }
748         }
749
750         void Renderer::scheduleTask(int threadIndex)
751         {
752                 schedulerMutex.lock();
753
754                 if((int)qSize < threadCount - threadsAwake + 1)
755                 {
756                         findAvailableTasks();
757                 }
758
759                 if(qSize != 0)
760                 {
761                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
762                         qSize--;
763
764                         if(threadsAwake != threadCount)
765                         {
766                                 int wakeup = qSize - threadsAwake + 1;
767
768                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
769                                 {
770                                         if(task[i].type == Task::SUSPEND)
771                                         {
772                                                 suspend[i]->wait();
773                                                 task[i].type = Task::RESUME;
774                                                 resume[i]->signal();
775
776                                                 threadsAwake++;
777                                                 wakeup--;
778                                         }
779                                 }
780                         }
781                 }
782                 else
783                 {
784                         task[threadIndex].type = Task::SUSPEND;
785
786                         threadsAwake--;
787                 }
788
789                 schedulerMutex.unlock();
790         }
791
792         void Renderer::executeTask(int threadIndex)
793         {
794                 #if PERF_HUD
795                         int64_t startTick = Timer::ticks();
796                 #endif
797
798                 switch(task[threadIndex].type)
799                 {
800                 case Task::PRIMITIVES:
801                         {
802                                 int unit = task[threadIndex].primitiveUnit;
803                                 
804                                 int input = primitiveProgress[unit].firstPrimitive;
805                                 int count = primitiveProgress[unit].primitiveCount;
806                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
807                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
808
809                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
810
811                                 #if PERF_HUD
812                                         int64_t time = Timer::ticks();
813                                         vertexTime[threadIndex] += time - startTick;
814                                         startTick = time;
815                                 #endif
816
817                                 int visible = setupPrimitives(this, unit, count);
818
819                                 primitiveProgress[unit].visible = visible;
820                                 primitiveProgress[unit].references = clusterCount;
821
822                                 #if PERF_HUD
823                                         setupTime[threadIndex] += Timer::ticks() - startTick;
824                                 #endif
825                         }
826                         break;
827                 case Task::PIXELS:
828                         {
829                                 int unit = task[threadIndex].primitiveUnit;
830                                 int visible = primitiveProgress[unit].visible;
831
832                                 if(visible > 0)
833                                 {
834                                         int cluster = task[threadIndex].pixelCluster;
835                                         Primitive *primitive = primitiveBatch[unit];
836                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
837                                         DrawData *data = draw->data;
838                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
839
840                                         pixelRoutine(primitive, visible, cluster, data);
841                                 }
842
843                                 finishRendering(task[threadIndex]);
844
845                                 #if PERF_HUD
846                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
847                                 #endif
848                         }
849                         break;
850                 case Task::RESUME:
851                         break;
852                 case Task::SUSPEND:
853                         break;
854                 default:
855                         ASSERT(false);
856                 }
857         }
858
859         void Renderer::synchronize()
860         {
861                 sync->lock(sw::PUBLIC);
862                 sync->unlock();
863         }
864
865         void Renderer::finishRendering(Task &pixelTask)
866         {
867                 int unit = pixelTask.primitiveUnit;
868                 int cluster = pixelTask.pixelCluster;
869
870                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
871                 DrawData &data = *draw.data;
872                 int primitive = primitiveProgress[unit].firstPrimitive;
873                 int count = primitiveProgress[unit].primitiveCount;
874
875                 pixelProgress[cluster].processedPrimitives = primitive + count;
876
877                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
878                 {
879                         pixelProgress[cluster].drawCall++;
880                         pixelProgress[cluster].processedPrimitives = 0;
881                 }
882
883                 int ref = atomicDecrement(&primitiveProgress[unit].references);
884
885                 if(ref == 0)
886                 {
887                         ref = atomicDecrement(&draw.references);
888
889                         if(ref == 0)
890                         {
891                                 #if PERF_PROFILE
892                                         for(int cluster = 0; cluster < clusterCount; cluster++)
893                                         {
894                                                 for(int i = 0; i < PERF_TIMERS; i++)
895                                                 {
896                                                         profiler.cycles[i] += data.cycles[i][cluster];
897                                                 }
898                                         }
899                                 #endif
900
901                                 if(draw.queries)
902                                 {
903                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
904                                         {
905                                                 Query *query = *q;
906
907                                                 for(int cluster = 0; cluster < clusterCount; cluster++)
908                                                 {
909                                                         atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
910                                                 }
911
912                                                 atomicDecrement(&query->reference);
913                                         }
914
915                                         delete draw.queries;
916                                         draw.queries = 0;
917                                 }
918
919                                 for(int i = 0; i < 4; i++)
920                                 {
921                                         if(draw.renderTarget[i])
922                                         {
923                                                 draw.renderTarget[i]->unlockInternal();
924                                         }
925                                 }
926
927                                 if(draw.depthStencil)
928                                 {
929                                         draw.depthStencil->unlockInternal();
930                                         draw.depthStencil->unlockStencil();
931                                 }
932
933                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
934                                 {
935                                         if(draw.texture[i])
936                                         {
937                                                 draw.texture[i]->unlock();
938                                         }
939                                 }
940
941                                 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
942                                 {
943                                         if(draw.vertexStream[i])
944                                         {
945                                                 draw.vertexStream[i]->unlock();
946                                         }
947                                 }
948
949                                 if(draw.indexBuffer)
950                                 {
951                                         draw.indexBuffer->unlock();
952                                 }
953
954                                 draw.vertexRoutine->unbind();
955                                 draw.setupRoutine->unbind();
956                                 draw.pixelRoutine->unbind();
957
958                                 sync->unlock();
959
960                                 draw.references = -1;
961                                 resumeApp->signal();
962                         }
963                 }
964
965                 pixelProgress[cluster].executing = false;
966         }
967
968         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
969         {
970                 Triangle *triangle = triangleBatch[unit];
971                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
972                 DrawData *data = draw->data;
973                 VertexTask *task = vertexTask[thread];
974
975                 const void *indices = data->indices;
976                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
977
978                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
979                 {
980                         task->vertexCache.clear();
981                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
982                 }
983
984                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
985
986                 switch(draw->drawType)
987                 {
988                 case DRAW_POINTLIST:
989                         {
990                                 unsigned int index = start;
991
992                                 for(unsigned int i = 0; i < triangleCount; i++)
993                                 {
994                                         batch[i][0] = index;
995                                         batch[i][1] = index;
996                                         batch[i][2] = index;
997
998                                         index += 1;
999                                 }
1000                         }
1001                         break;
1002                 case DRAW_LINELIST:
1003                         {
1004                                 unsigned int index = 2 * start;
1005
1006                                 for(unsigned int i = 0; i < triangleCount; i++)
1007                                 {
1008                                         batch[i][0] = index + 0;
1009                                         batch[i][1] = index + 1;
1010                                         batch[i][2] = index + 1;
1011
1012                                         index += 2;
1013                                 }
1014                         }
1015                         break;
1016                 case DRAW_LINESTRIP:
1017                         {
1018                                 unsigned int index = start;
1019
1020                                 for(unsigned int i = 0; i < triangleCount; i++)
1021                                 {
1022                                         batch[i][0] = index + 0;
1023                                         batch[i][1] = index + 1;
1024                                         batch[i][2] = index + 1;
1025
1026                                         index += 1;
1027                                 }
1028                         }
1029                         break;
1030                 case DRAW_LINELOOP:
1031                         {
1032                                 unsigned int index = start;
1033
1034                                 for(unsigned int i = 0; i < triangleCount; i++)
1035                                 {
1036                                         batch[i][0] = (index + 0) % loop;
1037                                         batch[i][1] = (index + 1) % loop;
1038                                         batch[i][2] = (index + 1) % loop;
1039
1040                                         index += 1;
1041                                 }
1042                         }
1043                         break;
1044                 case DRAW_TRIANGLELIST:
1045                         {
1046                                 unsigned int index = 3 * start;
1047
1048                                 for(unsigned int i = 0; i < triangleCount; i++)
1049                                 {
1050                                         batch[i][0] = index + 0;
1051                                         batch[i][1] = index + 1;
1052                                         batch[i][2] = index + 2;
1053
1054                                         index += 3;
1055                                 }
1056                         }
1057                         break;
1058                 case DRAW_TRIANGLESTRIP:
1059                         {
1060                                 unsigned int index = start;
1061
1062                                 for(unsigned int i = 0; i < triangleCount; i++)
1063                                 {
1064                                         batch[i][0] = index + 0;
1065                                         batch[i][1] = index + (index & 1) + 1;
1066                                         batch[i][2] = index + (~index & 1) + 1;
1067
1068                                         index += 1;
1069                                 }
1070                         }
1071                         break;
1072                 case DRAW_TRIANGLEFAN:
1073                         {
1074                                 unsigned int index = start;
1075
1076                                 for(unsigned int i = 0; i < triangleCount; i++)
1077                                 {
1078                                         batch[i][0] = index + 1;
1079                                         batch[i][1] = index + 2;
1080                                         batch[i][2] = 0;
1081
1082                                         index += 1;
1083                                 }
1084                         }
1085                         break;
1086                 case DRAW_INDEXEDPOINTLIST8:
1087                         {
1088                                 const unsigned char *index = (const unsigned char*)indices + start;
1089
1090                                 for(unsigned int i = 0; i < triangleCount; i++)
1091                                 {
1092                                         batch[i][0] = *index;
1093                                         batch[i][1] = *index;
1094                                         batch[i][2] = *index;
1095
1096                                         index += 1;
1097                                 }
1098                         }
1099                         break;
1100                 case DRAW_INDEXEDPOINTLIST16:
1101                         {
1102                                 const unsigned short *index = (const unsigned short*)indices + start;
1103
1104                                 for(unsigned int i = 0; i < triangleCount; i++)
1105                                 {
1106                                         batch[i][0] = *index;
1107                                         batch[i][1] = *index;
1108                                         batch[i][2] = *index;
1109
1110                                         index += 1;
1111                                 }
1112                         }
1113                         break;
1114                 case DRAW_INDEXEDPOINTLIST32:
1115                         {
1116                                 const unsigned int *index = (const unsigned int*)indices + start;
1117
1118                                 for(unsigned int i = 0; i < triangleCount; i++)
1119                                 {
1120                                         batch[i][0] = *index;
1121                                         batch[i][1] = *index;
1122                                         batch[i][2] = *index;
1123
1124                                         index += 1;
1125                                 }
1126                         }
1127                         break;
1128                 case DRAW_INDEXEDLINELIST8:
1129                         {
1130                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1131
1132                                 for(unsigned int i = 0; i < triangleCount; i++)
1133                                 {
1134                                         batch[i][0] = index[0];
1135                                         batch[i][1] = index[1];
1136                                         batch[i][2] = index[1];
1137
1138                                         index += 2;
1139                                 }
1140                         }
1141                         break;
1142                 case DRAW_INDEXEDLINELIST16:
1143                         {
1144                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1145
1146                                 for(unsigned int i = 0; i < triangleCount; i++)
1147                                 {
1148                                         batch[i][0] = index[0];
1149                                         batch[i][1] = index[1];
1150                                         batch[i][2] = index[1];
1151
1152                                         index += 2;
1153                                 }
1154                         }
1155                         break;
1156                 case DRAW_INDEXEDLINELIST32:
1157                         {
1158                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1159
1160                                 for(unsigned int i = 0; i < triangleCount; i++)
1161                                 {
1162                                         batch[i][0] = index[0];
1163                                         batch[i][1] = index[1];
1164                                         batch[i][2] = index[1];
1165
1166                                         index += 2;
1167                                 }
1168                         }
1169                         break;
1170                 case DRAW_INDEXEDLINESTRIP8:
1171                         {
1172                                 const unsigned char *index = (const unsigned char*)indices + start;
1173
1174                                 for(unsigned int i = 0; i < triangleCount; i++)
1175                                 {
1176                                         batch[i][0] = index[0];
1177                                         batch[i][1] = index[1];
1178                                         batch[i][2] = index[1];
1179
1180                                         index += 1;
1181                                 }
1182                         }
1183                         break;
1184                 case DRAW_INDEXEDLINESTRIP16:
1185                         {
1186                                 const unsigned short *index = (const unsigned short*)indices + start;
1187
1188                                 for(unsigned int i = 0; i < triangleCount; i++)
1189                                 {
1190                                         batch[i][0] = index[0];
1191                                         batch[i][1] = index[1];
1192                                         batch[i][2] = index[1];
1193
1194                                         index += 1;
1195                                 }
1196                         }
1197                         break;
1198                 case DRAW_INDEXEDLINESTRIP32:
1199                         {
1200                                 const unsigned int *index = (const unsigned int*)indices + start;
1201
1202                                 for(unsigned int i = 0; i < triangleCount; i++)
1203                                 {
1204                                         batch[i][0] = index[0];
1205                                         batch[i][1] = index[1];
1206                                         batch[i][2] = index[1];
1207
1208                                         index += 1;
1209                                 }
1210                         }
1211                         break;
1212                 case DRAW_INDEXEDLINELOOP8:
1213                         {
1214                                 const unsigned char *index = (const unsigned char*)indices;
1215
1216                                 for(unsigned int i = 0; i < triangleCount; i++)
1217                                 {
1218                                         batch[i][0] = index[(start + i + 0) % loop];
1219                                         batch[i][1] = index[(start + i + 1) % loop];
1220                                         batch[i][2] = index[(start + i + 1) % loop];
1221                                 }
1222                         }
1223                         break;
1224                 case DRAW_INDEXEDLINELOOP16:
1225                         {
1226                                 const unsigned short *index = (const unsigned short*)indices;
1227
1228                                 for(unsigned int i = 0; i < triangleCount; i++)
1229                                 {
1230                                         batch[i][0] = index[(start + i + 0) % loop];
1231                                         batch[i][1] = index[(start + i + 1) % loop];
1232                                         batch[i][2] = index[(start + i + 1) % loop];
1233                                 }
1234                         }
1235                         break;
1236                 case DRAW_INDEXEDLINELOOP32:
1237                         {
1238                                 const unsigned int *index = (const unsigned int*)indices;
1239
1240                                 for(unsigned int i = 0; i < triangleCount; i++)
1241                                 {
1242                                         batch[i][0] = index[(start + i + 0) % loop];
1243                                         batch[i][1] = index[(start + i + 1) % loop];
1244                                         batch[i][2] = index[(start + i + 1) % loop];
1245                                 }
1246                         }
1247                         break;
1248                 case DRAW_INDEXEDTRIANGLELIST8:
1249                         {
1250                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1251
1252                                 for(unsigned int i = 0; i < triangleCount; i++)
1253                                 {
1254                                         batch[i][0] = index[0];
1255                                         batch[i][1] = index[1];
1256                                         batch[i][2] = index[2];
1257
1258                                         index += 3;
1259                                 }
1260                         }
1261                         break;
1262                 case DRAW_INDEXEDTRIANGLELIST16:
1263                         {
1264                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1265
1266                                 for(unsigned int i = 0; i < triangleCount; i++)
1267                                 {
1268                                         batch[i][0] = index[0];
1269                                         batch[i][1] = index[1];
1270                                         batch[i][2] = index[2];
1271
1272                                         index += 3;
1273                                 }
1274                         }
1275                         break;
1276                 case DRAW_INDEXEDTRIANGLELIST32:
1277                         {
1278                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1279
1280                                 for(unsigned int i = 0; i < triangleCount; i++)
1281                                 {
1282                                         batch[i][0] = index[0];
1283                                         batch[i][1] = index[1];
1284                                         batch[i][2] = index[2];
1285
1286                                         index += 3;
1287                                 }
1288                         }
1289                         break;
1290                 case DRAW_INDEXEDTRIANGLESTRIP8:
1291                         {
1292                                 const unsigned char *index = (const unsigned char*)indices + start;
1293
1294                                 for(unsigned int i = 0; i < triangleCount; i++)
1295                                 {
1296                                         batch[i][0] = index[0];
1297                                         batch[i][1] = index[((start + i) & 1) + 1];
1298                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1299
1300                                         index += 1;
1301                                 }
1302                         }
1303                         break;
1304                 case DRAW_INDEXEDTRIANGLESTRIP16:
1305                         {
1306                                 const unsigned short *index = (const unsigned short*)indices + start;
1307
1308                                 for(unsigned int i = 0; i < triangleCount; i++)
1309                                 {
1310                                         batch[i][0] = index[0];
1311                                         batch[i][1] = index[((start + i) & 1) + 1];
1312                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1313
1314                                         index += 1;
1315                                 }
1316                         }
1317                         break;
1318                 case DRAW_INDEXEDTRIANGLESTRIP32:
1319                         {
1320                                 const unsigned int *index = (const unsigned int*)indices + start;
1321
1322                                 for(unsigned int i = 0; i < triangleCount; i++)
1323                                 {
1324                                         batch[i][0] = index[0];
1325                                         batch[i][1] = index[((start + i) & 1) + 1];
1326                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1327
1328                                         index += 1;
1329                                 }
1330                         }
1331                         break;
1332                 case DRAW_INDEXEDTRIANGLEFAN8:
1333                         {
1334                                 const unsigned char *index = (const unsigned char*)indices;
1335
1336                                 for(unsigned int i = 0; i < triangleCount; i++)
1337                                 {
1338                                         batch[i][0] = index[start + i + 1];
1339                                         batch[i][1] = index[start + i + 2];
1340                                         batch[i][2] = index[0];
1341                                 }
1342                         }
1343                         break;
1344                 case DRAW_INDEXEDTRIANGLEFAN16:
1345                         {
1346                                 const unsigned short *index = (const unsigned short*)indices;
1347
1348                                 for(unsigned int i = 0; i < triangleCount; i++)
1349                                 {
1350                                         batch[i][0] = index[start + i + 1];
1351                                         batch[i][1] = index[start + i + 2];
1352                                         batch[i][2] = index[0];
1353                                 }
1354                         }
1355                         break;
1356                 case DRAW_INDEXEDTRIANGLEFAN32:
1357                         {
1358                                 const unsigned int *index = (const unsigned int*)indices;
1359
1360                                 for(unsigned int i = 0; i < triangleCount; i++)
1361                                 {
1362                                         batch[i][0] = index[start + i + 1];
1363                                         batch[i][1] = index[start + i + 2];
1364                                         batch[i][2] = index[0];
1365                                 }
1366                         }
1367                         break;
1368         case DRAW_QUADLIST:
1369                         {
1370                                 unsigned int index = 4 * start / 2;
1371
1372                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1373                                 {
1374                                         batch[i+0][0] = index + 0;
1375                                         batch[i+0][1] = index + 1;
1376                                         batch[i+0][2] = index + 2;
1377
1378                     batch[i+1][0] = index + 0;
1379                                         batch[i+1][1] = index + 2;
1380                                         batch[i+1][2] = index + 3;
1381
1382                                         index += 4;
1383                                 }
1384                         }
1385                         break;
1386                 default:
1387                         ASSERT(false);
1388                 }
1389
1390                 task->vertexCount = triangleCount * 3;
1391                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1392         }
1393
1394         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1395         {
1396                 Triangle *triangle = renderer->triangleBatch[unit];
1397                 Primitive *primitive = renderer->primitiveBatch[unit];
1398
1399                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1400                 SetupProcessor::State &state = draw.setupState;
1401                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1402
1403                 int ms = state.multiSample;
1404                 int pos = state.positionRegister;
1405                 const DrawData *data = draw.data;
1406                 int visible = 0;
1407
1408                 for(int i = 0; i < count; i++, triangle++)
1409                 {
1410                         Vertex &v0 = triangle->v0;
1411                         Vertex &v1 = triangle->v1;
1412                         Vertex &v2 = triangle->v2;
1413
1414                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1415                         {
1416                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1417
1418                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1419
1420                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1421                                 {
1422                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1423                                         {
1424                                                 continue;
1425                                         }
1426                                 }
1427
1428                                 if(setupRoutine(primitive, triangle, &polygon, data))
1429                                 {
1430                                         primitive += ms;
1431                                         visible++;
1432                                 }
1433                         }
1434                 }
1435
1436                 return visible;
1437         }
1438
1439         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1440         {
1441                 Triangle *triangle = renderer->triangleBatch[unit];
1442                 Primitive *primitive = renderer->primitiveBatch[unit];
1443                 int visible = 0;
1444
1445                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1446                 SetupProcessor::State &state = draw.setupState;
1447                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1448
1449                 const Vertex &v0 = triangle[0].v0;
1450                 const Vertex &v1 = triangle[0].v1;
1451                 const Vertex &v2 = triangle[0].v2;
1452
1453                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1454
1455                 if(state.cullMode == CULL_CLOCKWISE)
1456                 {
1457                         if(d >= 0) return 0;
1458                 }
1459                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1460                 {
1461                         if(d <= 0) return 0;
1462                 }
1463
1464                 // Copy attributes
1465                 triangle[1].v0 = v1;
1466                 triangle[1].v1 = v2;
1467                 triangle[2].v0 = v2;
1468                 triangle[2].v1 = v0;
1469
1470                 if(state.color[0][0].flat)   // FIXME
1471                 {
1472                         for(int i = 0; i < 2; i++)
1473                         {
1474                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1475                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1476                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1477                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1478                         }
1479                 }
1480
1481                 for(int i = 0; i < 3; i++)
1482                 {
1483                         if(setupLine(renderer, *primitive, *triangle, draw))
1484                         {
1485                                 primitive->area = 0.5f * d;
1486
1487                                 primitive++;
1488                                 visible++;
1489                         }
1490
1491                         triangle++;
1492                 }
1493
1494                 return visible;
1495         }
1496         
1497         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1498         {
1499                 Triangle *triangle = renderer->triangleBatch[unit];
1500                 Primitive *primitive = renderer->primitiveBatch[unit];
1501                 int visible = 0;
1502
1503                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1504                 SetupProcessor::State &state = draw.setupState;
1505
1506                 const Vertex &v0 = triangle[0].v0;
1507                 const Vertex &v1 = triangle[0].v1;
1508                 const Vertex &v2 = triangle[0].v2;
1509
1510                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1511
1512                 if(state.cullMode == CULL_CLOCKWISE)
1513                 {
1514                         if(d >= 0) return 0;
1515                 }
1516                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1517                 {
1518                         if(d <= 0) return 0;
1519                 }
1520
1521                 // Copy attributes
1522                 triangle[1].v0 = v1;
1523                 triangle[2].v0 = v2;
1524
1525                 for(int i = 0; i < 3; i++)
1526                 {
1527                         if(setupPoint(renderer, *primitive, *triangle, draw))
1528                         {
1529                                 primitive->area = 0.5f * d;
1530
1531                                 primitive++;
1532                                 visible++;
1533                         }
1534
1535                         triangle++;
1536                 }
1537
1538                 return visible;
1539         }
1540
1541         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1542         {
1543                 Triangle *triangle = renderer->triangleBatch[unit];
1544                 Primitive *primitive = renderer->primitiveBatch[unit];
1545                 int visible = 0;
1546
1547                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1548                 SetupProcessor::State &state = draw.setupState;
1549
1550                 int ms = state.multiSample;
1551
1552                 for(int i = 0; i < count; i++)
1553                 {
1554                         if(setupLine(renderer, *primitive, *triangle, draw))
1555                         {
1556                                 primitive += ms;
1557                                 visible++;
1558                         }
1559
1560                         triangle++;
1561                 }
1562
1563                 return visible;
1564         }
1565
1566         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1567         {
1568                 Triangle *triangle = renderer->triangleBatch[unit];
1569                 Primitive *primitive = renderer->primitiveBatch[unit];
1570                 int visible = 0;
1571
1572                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1573                 SetupProcessor::State &state = draw.setupState;
1574
1575                 int ms = state.multiSample;
1576
1577                 for(int i = 0; i < count; i++)
1578                 {
1579                         if(setupPoint(renderer, *primitive, *triangle, draw))
1580                         {
1581                                 primitive += ms;
1582                                 visible++;
1583                         }
1584
1585                         triangle++;
1586                 }
1587
1588                 return visible;
1589         }
1590
1591         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1592         {
1593                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1594                 const SetupProcessor::State &state = draw.setupState;
1595                 const DrawData &data = *draw.data;
1596
1597                 float lineWidth = data.lineWidth;
1598
1599                 Vertex &v0 = triangle.v0;
1600                 Vertex &v1 = triangle.v1;
1601
1602                 int pos = state.positionRegister;
1603
1604                 const float4 &P0 = v0.v[pos];
1605                 const float4 &P1 = v1.v[pos];
1606
1607                 if(P0.w <= 0 && P1.w <= 0)
1608                 {
1609                         return false;
1610                 }
1611
1612                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1613                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1614
1615                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1616                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1617
1618                 if(dx == 0 && dy == 0)
1619                 {
1620                         return false;
1621                 }
1622
1623                 if(false)   // Rectangle
1624                 {
1625                         float4 P[4];
1626                         int C[4];
1627
1628                         P[0] = P0;
1629                         P[1] = P1;
1630                         P[2] = P1;
1631                         P[3] = P0;
1632
1633                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1634
1635                         dx *= scale;
1636                         dy *= scale;
1637
1638                         float dx0w = dx * P0.w / W;
1639                         float dy0h = dy * P0.w / H;
1640                         float dx0h = dx * P0.w / H;
1641                         float dy0w = dy * P0.w / W;
1642
1643                         float dx1w = dx * P1.w / W;
1644                         float dy1h = dy * P1.w / H;
1645                         float dx1h = dx * P1.w / H;
1646                         float dy1w = dy * P1.w / W;
1647
1648                         P[0].x += -dy0w + -dx0w;
1649                         P[0].y += -dx0h + +dy0h;
1650                         C[0] = computeClipFlags(P[0], data);
1651
1652                         P[1].x += -dy1w + +dx1w;
1653                         P[1].y += -dx1h + +dy1h;
1654                         C[1] = computeClipFlags(P[1], data);
1655
1656                         P[2].x += +dy1w + +dx1w;
1657                         P[2].y += +dx1h + -dy1h;
1658                         C[2] = computeClipFlags(P[2], data);
1659
1660                         P[3].x += +dy0w + -dx0w;
1661                         P[3].y += +dx0h + +dy0h;
1662                         C[3] = computeClipFlags(P[3], data);
1663
1664                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1665                         {
1666                                 Polygon polygon(P, 4);
1667
1668                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1669
1670                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1671                                 {
1672                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1673                                         {
1674                                                 return false;
1675                                         }
1676                                 }
1677
1678                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1679                         }
1680                 }
1681                 else   // Diamond test convention
1682                 {
1683                         float4 P[8];
1684                         int C[8];
1685
1686                         P[0] = P0;
1687                         P[1] = P0;
1688                         P[2] = P0;
1689                         P[3] = P0;
1690                         P[4] = P1;
1691                         P[5] = P1;
1692                         P[6] = P1;
1693                         P[7] = P1;
1694
1695                         float dx0 = lineWidth * 0.5f * P0.w / W;
1696                         float dy0 = lineWidth * 0.5f * P0.w / H;
1697
1698                         float dx1 = lineWidth * 0.5f * P1.w / W;
1699                         float dy1 = lineWidth * 0.5f * P1.w / H;
1700
1701                         P[0].x += -dx0;
1702                         C[0] = computeClipFlags(P[0], data);
1703
1704                         P[1].y += +dy0;
1705                         C[1] = computeClipFlags(P[1], data);
1706
1707                         P[2].x += +dx0;
1708                         C[2] = computeClipFlags(P[2], data);
1709
1710                         P[3].y += -dy0;
1711                         C[3] = computeClipFlags(P[3], data);
1712
1713                         P[4].x += -dx1;
1714                         C[4] = computeClipFlags(P[4], data);
1715
1716                         P[5].y += +dy1;
1717                         C[5] = computeClipFlags(P[5], data);
1718
1719                         P[6].x += +dx1;
1720                         C[6] = computeClipFlags(P[6], data);
1721
1722                         P[7].y += -dy1;
1723                         C[7] = computeClipFlags(P[7], data);
1724
1725                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1726                         {
1727                                 float4 L[6];
1728
1729                                 if(dx > -dy)
1730                                 {
1731                                         if(dx > dy)   // Right
1732                                         {
1733                                                 L[0] = P[0];
1734                                                 L[1] = P[1];
1735                                                 L[2] = P[5];
1736                                                 L[3] = P[6];
1737                                                 L[4] = P[7];
1738                                                 L[5] = P[3];
1739                                         }
1740                                         else   // Down
1741                                         {
1742                                                 L[0] = P[0];
1743                                                 L[1] = P[4];
1744                                                 L[2] = P[5];
1745                                                 L[3] = P[6];
1746                                                 L[4] = P[2];
1747                                                 L[5] = P[3];
1748                                         }
1749                                 }
1750                                 else
1751                                 {
1752                                         if(dx > dy)   // Up
1753                                         {
1754                                                 L[0] = P[0];
1755                                                 L[1] = P[1];
1756                                                 L[2] = P[2];
1757                                                 L[3] = P[6];
1758                                                 L[4] = P[7];
1759                                                 L[5] = P[4];
1760                                         }
1761                                         else   // Left
1762                                         {
1763                                                 L[0] = P[1];
1764                                                 L[1] = P[2];
1765                                                 L[2] = P[3];
1766                                                 L[3] = P[7];
1767                                                 L[4] = P[4];
1768                                                 L[5] = P[5];
1769                                         }
1770                                 }
1771
1772                                 Polygon polygon(L, 6);
1773
1774                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1775
1776                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1777                                 {
1778                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1779                                         {
1780                                                 return false;
1781                                         }
1782                                 }
1783
1784                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1785                         }
1786                 }
1787
1788                 return false;
1789         }
1790
1791         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1792         {
1793                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1794                 const SetupProcessor::State &state = draw.setupState;
1795                 const DrawData &data = *draw.data;
1796
1797                 Vertex &v = triangle.v0;
1798
1799                 float pSize;
1800
1801                 int pts = state.pointSizeRegister;
1802
1803                 if(state.pointSizeRegister != 0xF)
1804                 {
1805                         pSize = v.v[pts].y;
1806                 }
1807                 else
1808                 {
1809                         pSize = data.point.pointSize[0];
1810                 }
1811
1812                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1813
1814                 float4 P[4];
1815                 int C[4];
1816
1817                 int pos = state.positionRegister;
1818
1819                 P[0] = v.v[pos];
1820                 P[1] = v.v[pos];
1821                 P[2] = v.v[pos];
1822                 P[3] = v.v[pos];
1823
1824                 const float X = pSize * P[0].w * data.halfPixelX[0];
1825                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1826
1827                 P[0].x -= X;
1828                 P[0].y += Y;
1829                 C[0] = computeClipFlags(P[0], data);
1830
1831                 P[1].x += X;
1832                 P[1].y += Y;
1833                 C[1] = computeClipFlags(P[1], data);
1834
1835                 P[2].x += X;
1836                 P[2].y -= Y;
1837                 C[2] = computeClipFlags(P[2], data);
1838
1839                 P[3].x -= X;
1840                 P[3].y -= Y;
1841                 C[3] = computeClipFlags(P[3], data);
1842
1843                 triangle.v1 = triangle.v0;
1844                 triangle.v2 = triangle.v0;
1845
1846                 triangle.v1.X += iround(16 * 0.5f * pSize);
1847                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1848
1849                 Polygon polygon(P, 4);
1850
1851                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1852                 {
1853                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1854
1855                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1856                         {
1857                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1858                                 {
1859                                         return false;
1860                                 }
1861                         }
1862                         
1863                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1864                 }
1865
1866                 return false;
1867         }
1868
1869         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1870         {
1871                 return ((v.x > v.w)  << 0) |
1872                            ((v.y > v.w)  << 1) |
1873                            ((v.z > v.w)  << 2) |
1874                            ((v.x < -v.w) << 3) |
1875                        ((v.y < -v.w) << 4) |
1876                            ((v.z < 0)    << 5) |
1877                            Clipper::CLIP_FINITE;   // FIXME: xyz finite
1878         }
1879
1880         void Renderer::initializeThreads()
1881         {
1882                 unitCount = ceilPow2(threadCount);
1883                 clusterCount = ceilPow2(threadCount);
1884
1885                 for(int i = 0; i < unitCount; i++)
1886                 {
1887                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1888                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1889                 }
1890
1891                 for(int i = 0; i < threadCount; i++)
1892                 {
1893                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1894                         vertexTask[i]->vertexCache.drawCall = -1;
1895
1896                         task[i].type = Task::SUSPEND;
1897
1898                         resume[i] = new Event();
1899                         suspend[i] = new Event();
1900
1901                         Parameters parameters;
1902                         parameters.threadIndex = i;
1903                         parameters.renderer = this;
1904
1905                         exitThreads = false;
1906                         worker[i] = new Thread(threadFunction, &parameters);
1907
1908                         suspend[i]->wait();
1909                         suspend[i]->signal();
1910                 }
1911         }
1912
1913         void Renderer::terminateThreads()
1914         {
1915                 while(threadsAwake != 0)
1916                 {
1917                         Thread::sleep(1);
1918                 }
1919
1920                 for(int thread = 0; thread < threadCount; thread++)
1921                 {
1922                         if(worker[thread])
1923                         {
1924                                 exitThreads = true;
1925                                 resume[thread]->signal();
1926                                 worker[thread]->join();
1927                                 
1928                                 delete worker[thread];
1929                                 worker[thread] = 0;
1930                                 delete resume[thread];
1931                                 resume[thread] = 0;
1932                                 delete suspend[thread];
1933                                 suspend[thread] = 0;
1934                         }
1935                 
1936                         deallocate(vertexTask[thread]);
1937                         vertexTask[thread] = 0;
1938                 }
1939
1940                 for(int i = 0; i < 16; i++)
1941                 {
1942                         deallocate(triangleBatch[i]);
1943                         triangleBatch[i] = 0;
1944
1945                         deallocate(primitiveBatch[i]);
1946                         primitiveBatch[i] = 0;
1947                 }
1948         }
1949
1950         void Renderer::loadConstants(const VertexShader *vertexShader)
1951         {
1952                 if(!vertexShader) return;
1953
1954                 size_t count = vertexShader->getLength();
1955
1956                 for(size_t i = 0; i < count; i++)
1957                 {
1958                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1959
1960                         if(instruction->opcode == Shader::OPCODE_DEF)
1961                         {
1962                                 int index = instruction->dst.index;
1963                                 float value[4];
1964
1965                                 value[0] = instruction->src[0].value[0];
1966                                 value[1] = instruction->src[0].value[1];
1967                                 value[2] = instruction->src[0].value[2];
1968                                 value[3] = instruction->src[0].value[3];
1969
1970                                 setVertexShaderConstantF(index, value);
1971                         }
1972                         else if(instruction->opcode == Shader::OPCODE_DEFI)
1973                         {
1974                                 int index = instruction->dst.index;
1975                                 int integer[4];
1976
1977                                 integer[0] = instruction->src[0].integer[0];
1978                                 integer[1] = instruction->src[0].integer[1];
1979                                 integer[2] = instruction->src[0].integer[2];
1980                                 integer[3] = instruction->src[0].integer[3];
1981
1982                                 setVertexShaderConstantI(index, integer);
1983                         }
1984                         else if(instruction->opcode == Shader::OPCODE_DEFB)
1985                         {
1986                                 int index = instruction->dst.index;
1987                                 int boolean = instruction->src[0].boolean[0];
1988
1989                                 setVertexShaderConstantB(index, &boolean);
1990                         }
1991                 }
1992         }
1993
1994         void Renderer::loadConstants(const PixelShader *pixelShader)
1995         {
1996                 if(!pixelShader) return;
1997
1998                 size_t count = pixelShader->getLength();
1999
2000                 for(size_t i = 0; i < count; i++)
2001                 {
2002                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2003
2004                         if(instruction->opcode == Shader::OPCODE_DEF)
2005                         {
2006                                 int index = instruction->dst.index;
2007                                 float value[4];
2008
2009                                 value[0] = instruction->src[0].value[0];
2010                                 value[1] = instruction->src[0].value[1];
2011                                 value[2] = instruction->src[0].value[2];
2012                                 value[3] = instruction->src[0].value[3];
2013
2014                                 setPixelShaderConstantF(index, value);
2015                         }
2016                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2017                         {
2018                                 int index = instruction->dst.index;
2019                                 int integer[4];
2020
2021                                 integer[0] = instruction->src[0].integer[0];
2022                                 integer[1] = instruction->src[0].integer[1];
2023                                 integer[2] = instruction->src[0].integer[2];
2024                                 integer[3] = instruction->src[0].integer[3];
2025
2026                                 setPixelShaderConstantI(index, integer);
2027                         }
2028                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2029                         {
2030                                 int index = instruction->dst.index;
2031                                 int boolean = instruction->src[0].boolean[0];
2032
2033                                 setPixelShaderConstantB(index, &boolean);
2034                         }
2035                 }
2036         }
2037
2038         void Renderer::setIndexBuffer(Resource *indexBuffer)
2039         {
2040                 context->indexBuffer = indexBuffer;
2041         }
2042
2043         void Renderer::setMultiSampleMask(unsigned int mask)
2044         {
2045                 context->sampleMask = mask;
2046         }
2047
2048         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2049         {
2050                 sw::transparencyAntialiasing = transparencyAntialiasing;
2051         }
2052
2053         bool Renderer::isReadWriteTexture(int sampler)
2054         {
2055                 for(int index = 0; index < 4; index++)
2056                 {
2057                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2058                         {
2059                                 return true;
2060                         }
2061                 }
2062         
2063                 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2064                 {
2065                         return true;
2066                 }
2067
2068                 return false;
2069         }
2070         
2071         void Renderer::updateClipper()
2072         {
2073                 if(updateClipPlanes)
2074                 {
2075                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2076                         {
2077                                 const Matrix &scissorWorld = getViewTransform();
2078
2079                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2080                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2081                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2082                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2083                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2084                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2085                         }
2086                         else   // User plane in clip space
2087                         {
2088                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2089                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2090                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2091                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2092                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2093                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2094                         }
2095
2096                         updateClipPlanes = false;
2097                 }
2098         }
2099
2100         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2101         {
2102                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2103
2104                 context->texture[sampler] = resource;
2105         }
2106
2107         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2108         {
2109                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2110                 
2111                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2112         }
2113
2114         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2115         {
2116                 if(type == SAMPLER_PIXEL)
2117                 {
2118                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2119                 }
2120                 else
2121                 {
2122                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2123                 }
2124         }
2125
2126         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2127         {
2128                 if(type == SAMPLER_PIXEL)
2129                 {
2130                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2131                 }
2132                 else
2133                 {
2134                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2135                 }
2136         }
2137
2138         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2139         {
2140                 if(type == SAMPLER_PIXEL)
2141                 {
2142                         PixelProcessor::setGatherEnable(sampler, enable);
2143                 }
2144                 else
2145                 {
2146                         VertexProcessor::setGatherEnable(sampler, enable);
2147                 }
2148         }
2149
2150         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2151         {
2152                 if(type == SAMPLER_PIXEL)
2153                 {
2154                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2155                 }
2156                 else
2157                 {
2158                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2159                 }
2160         }
2161
2162         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2163         {
2164                 if(type == SAMPLER_PIXEL)
2165                 {
2166                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2167                 }
2168                 else
2169                 {
2170                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2171                 }
2172         }
2173
2174         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2175         {
2176                 if(type == SAMPLER_PIXEL)
2177                 {
2178                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2179                 }
2180                 else
2181                 {
2182                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2183                 }
2184         }
2185
2186         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2187         {
2188                 if(type == SAMPLER_PIXEL)
2189                 {
2190                         PixelProcessor::setReadSRGB(sampler, sRGB);
2191                 }
2192                 else
2193                 {
2194                         VertexProcessor::setReadSRGB(sampler, sRGB);
2195                 }
2196         }
2197
2198         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2199         {
2200                 if(type == SAMPLER_PIXEL)
2201                 {
2202                         PixelProcessor::setMipmapLOD(sampler, bias);
2203                 }
2204                 else
2205                 {
2206                         VertexProcessor::setMipmapLOD(sampler, bias);
2207                 }
2208         }
2209
2210         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2211         {
2212                 if(type == SAMPLER_PIXEL)
2213                 {
2214                         PixelProcessor::setBorderColor(sampler, borderColor);
2215                 }
2216                 else
2217                 {
2218                         VertexProcessor::setBorderColor(sampler, borderColor);
2219                 }
2220         }
2221
2222         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2223         {
2224                 if(type == SAMPLER_PIXEL)
2225                 {
2226                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2227                 }
2228                 else
2229                 {
2230                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2231                 }
2232         }
2233
2234         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2235         {
2236                 context->setPointSpriteEnable(pointSpriteEnable);
2237         }
2238
2239         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2240         {
2241                 context->setPointScaleEnable(pointScaleEnable);
2242         }
2243
2244         void Renderer::setLineWidth(float width)
2245         {
2246                 context->lineWidth = width;
2247         }
2248
2249         void Renderer::setDepthBias(float bias)
2250         {
2251                 depthBias = bias;
2252         }
2253
2254         void Renderer::setSlopeDepthBias(float slopeBias)
2255         {
2256                 slopeDepthBias = slopeBias;
2257         }
2258
2259         void Renderer::setPixelShader(const PixelShader *shader)
2260         {
2261                 context->pixelShader = shader;
2262
2263                 loadConstants(shader);
2264         }
2265
2266         void Renderer::setVertexShader(const VertexShader *shader)
2267         {
2268                 context->vertexShader = shader;
2269
2270                 loadConstants(shader);
2271         }
2272
2273         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2274         {
2275                 for(int i = 0; i < DRAW_COUNT; i++)
2276                 {
2277                         if(drawCall[i]->psDirtyConstF < index + count)
2278                         {
2279                                 drawCall[i]->psDirtyConstF = index + count;
2280                         }
2281                 }
2282
2283                 for(int i = 0; i < count; i++)
2284                 {
2285                         PixelProcessor::setFloatConstant(index + i, value);
2286                         value += 4;
2287                 }
2288         }
2289
2290         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2291         {
2292                 for(int i = 0; i < DRAW_COUNT; i++)
2293                 {
2294                         if(drawCall[i]->psDirtyConstI < index + count)
2295                         {
2296                                 drawCall[i]->psDirtyConstI = index + count;
2297                         }
2298                 }
2299
2300                 for(int i = 0; i < count; i++)
2301                 {
2302                         PixelProcessor::setIntegerConstant(index + i, value);
2303                         value += 4;
2304                 }
2305         }
2306
2307         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2308         {
2309                 for(int i = 0; i < DRAW_COUNT; i++)
2310                 {
2311                         if(drawCall[i]->psDirtyConstB < index + count)
2312                         {
2313                                 drawCall[i]->psDirtyConstB = index + count;
2314                         }
2315                 }
2316
2317                 for(int i = 0; i < count; i++)
2318                 {
2319                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2320                         boolean++;
2321                 }
2322         }
2323
2324         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2325         {
2326                 for(int i = 0; i < DRAW_COUNT; i++)
2327                 {
2328                         if(drawCall[i]->vsDirtyConstF < index + count)
2329                         {
2330                                 drawCall[i]->vsDirtyConstF = index + count;
2331                         }
2332                 }
2333
2334                 for(int i = 0; i < count; i++)
2335                 {
2336                         VertexProcessor::setFloatConstant(index + i, value);
2337                         value += 4;
2338                 }
2339         }
2340
2341         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2342         {
2343                 for(int i = 0; i < DRAW_COUNT; i++)
2344                 {
2345                         if(drawCall[i]->vsDirtyConstI < index + count)
2346                         {
2347                                 drawCall[i]->vsDirtyConstI = index + count;
2348                         }
2349                 }
2350
2351                 for(int i = 0; i < count; i++)
2352                 {
2353                         VertexProcessor::setIntegerConstant(index + i, value);
2354                         value += 4;
2355                 }
2356         }
2357
2358         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2359         {
2360                 for(int i = 0; i < DRAW_COUNT; i++)
2361                 {
2362                         if(drawCall[i]->vsDirtyConstB < index + count)
2363                         {
2364                                 drawCall[i]->vsDirtyConstB = index + count;
2365                         }
2366                 }
2367
2368                 for(int i = 0; i < count; i++)
2369                 {
2370                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2371                         boolean++;
2372                 }
2373         }
2374
2375         void Renderer::setModelMatrix(const Matrix &M, int i)
2376         {
2377                 VertexProcessor::setModelMatrix(M, i);
2378         }
2379
2380         void Renderer::setViewMatrix(const Matrix &V)
2381         {
2382                 VertexProcessor::setViewMatrix(V);
2383                 updateClipPlanes = true;
2384         }
2385
2386         void Renderer::setBaseMatrix(const Matrix &B)
2387         {
2388                 VertexProcessor::setBaseMatrix(B);
2389                 updateClipPlanes = true;
2390         }
2391
2392         void Renderer::setProjectionMatrix(const Matrix &P)
2393         {
2394                 VertexProcessor::setProjectionMatrix(P);
2395                 updateClipPlanes = true;
2396         }
2397
2398         void Renderer::addQuery(Query *query)
2399         {
2400                 queries.push_back(query);
2401         }
2402         
2403         void Renderer::removeQuery(Query *query)
2404         {
2405                 queries.remove(query);
2406         }
2407
2408         #if PERF_HUD
2409                 int Renderer::getThreadCount()
2410                 {
2411                         return threadCount;
2412                 }
2413                 
2414                 int64_t Renderer::getVertexTime(int thread)
2415                 {
2416                         return vertexTime[thread];
2417                 }
2418
2419                 int64_t Renderer::getSetupTime(int thread)
2420                 {
2421                         return setupTime[thread];
2422                 }
2423                         
2424                 int64_t Renderer::getPixelTime(int thread)
2425                 {
2426                         return pixelTime[thread];
2427                 }
2428
2429                 void Renderer::resetTimers()
2430                 {
2431                         for(int thread = 0; thread < threadCount; thread++)
2432                         {
2433                                 vertexTime[thread] = 0;
2434                                 setupTime[thread] = 0;
2435                                 pixelTime[thread] = 0;
2436                         }
2437                 }
2438         #endif
2439
2440         void Renderer::setViewport(const Viewport &viewport)
2441         {
2442                 this->viewport = viewport;
2443         }
2444
2445         void Renderer::setScissor(const Rect &scissor)
2446         {
2447                 this->scissor = scissor;
2448         }
2449
2450         void Renderer::setClipFlags(int flags)
2451         {
2452                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2453         }
2454
2455         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2456         {
2457                 if(index < MAX_CLIP_PLANES)
2458                 {
2459                         userPlane[index] = plane;
2460                 }
2461                 else ASSERT(false);
2462
2463                 updateClipPlanes = true;
2464         }
2465
2466         void Renderer::updateConfiguration(bool initialUpdate)
2467         {
2468                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2469
2470                 if(newConfiguration || initialUpdate)
2471                 {
2472                         terminateThreads();
2473
2474                         SwiftConfig::Configuration configuration = {0};
2475                         swiftConfig->getConfiguration(configuration);
2476
2477                         precacheVertex = !newConfiguration && configuration.precache;
2478                         precacheSetup = !newConfiguration && configuration.precache;
2479                         precachePixel = !newConfiguration && configuration.precache;
2480
2481                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2482                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2483                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2484
2485                         switch(configuration.textureSampleQuality)
2486                         {
2487                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2488                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2489                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2490                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2491                         }
2492
2493                         switch(configuration.mipmapQuality)
2494                         {
2495                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2496                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2497                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2498                         }
2499
2500                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2501
2502                         switch(configuration.transcendentalPrecision)
2503                         {
2504                         case 0:
2505                                 logPrecision = APPROXIMATE;
2506                                 expPrecision = APPROXIMATE;
2507                                 rcpPrecision = APPROXIMATE;
2508                                 rsqPrecision = APPROXIMATE;
2509                                 break;
2510                         case 1:
2511                                 logPrecision = PARTIAL;
2512                                 expPrecision = PARTIAL;
2513                                 rcpPrecision = PARTIAL;
2514                                 rsqPrecision = PARTIAL;
2515                                 break;
2516                         case 2:
2517                                 logPrecision = ACCURATE;
2518                                 expPrecision = ACCURATE;
2519                                 rcpPrecision = ACCURATE;
2520                                 rsqPrecision = ACCURATE;
2521                                 break;
2522                         case 3:
2523                                 logPrecision = WHQL;
2524                                 expPrecision = WHQL;
2525                                 rcpPrecision = WHQL;
2526                                 rsqPrecision = WHQL;
2527                                 break;
2528                         case 4:
2529                                 logPrecision = IEEE;
2530                                 expPrecision = IEEE;
2531                                 rcpPrecision = IEEE;
2532                                 rsqPrecision = IEEE;
2533                                 break;
2534                         default:
2535                                 logPrecision = ACCURATE;
2536                                 expPrecision = ACCURATE;
2537                                 rcpPrecision = ACCURATE;
2538                                 rsqPrecision = ACCURATE;
2539                                 break;
2540                         }
2541
2542                         switch(configuration.transparencyAntialiasing)
2543                         {
2544                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2545                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2546                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2547                         }
2548
2549                         switch(configuration.threadCount)
2550                         {
2551                         case -1: threadCount = CPUID::coreCount();        break;
2552                         case 0:  threadCount = CPUID::processAffinity();  break;
2553                         default: threadCount = configuration.threadCount; break;
2554                         }
2555
2556                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2557                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2558                         CPUID::setEnableSSE3(configuration.enableSSE3);
2559                         CPUID::setEnableSSE2(configuration.enableSSE2);
2560                         CPUID::setEnableSSE(configuration.enableSSE);
2561
2562                         for(int pass = 0; pass < 10; pass++)
2563                         {
2564                                 optimization[pass] = configuration.optimization[pass];
2565                         }
2566
2567                         forceWindowed = configuration.forceWindowed;
2568                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2569                         postBlendSRGB = configuration.postBlendSRGB;
2570                         exactColorRounding = configuration.exactColorRounding;
2571                         forceClearRegisters = configuration.forceClearRegisters;
2572
2573                 #ifndef NDEBUG
2574                         minPrimitives = configuration.minPrimitives;
2575                         maxPrimitives = configuration.maxPrimitives;
2576                 #endif
2577                 }
2578
2579                 if(!initialUpdate && !worker[0])
2580                 {
2581                         initializeThreads();
2582                 }
2583         }
2584 }