OSDN Git Service

Support separate depth and stencil buffers.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2012 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Renderer.hpp"
13
14 #include "Clipper.hpp"
15 #include "Math.hpp"
16 #include "FrameBuffer.hpp"
17 #include "Timer.hpp"
18 #include "Surface.hpp"
19 #include "Half.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
24 #include "CPUID.hpp"
25 #include "Memory.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
28 #include "Debug.hpp"
29 #include "Reactor/Reactor.hpp"
30
31 #undef max
32
33 bool disableServer = true;
34
35 #ifndef NDEBUG
36 unsigned int minPrimitives = 1;
37 unsigned int maxPrimitives = 1 << 21;
38 #endif
39
40 namespace sw
41 {
42         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
43         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
44         extern bool booleanFaceRegister;
45         extern bool fullPixelPositionRegister;
46         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
47         extern bool secondaryColor;             // Specular lighting is applied after texturing
48
49         extern bool forceWindowed;
50         extern bool complementaryDepthBuffer;
51         extern bool postBlendSRGB;
52         extern bool exactColorRounding;
53         extern TransparencyAntialiasing transparencyAntialiasing;
54         extern bool forceClearRegisters;
55
56         extern bool precacheVertex;
57         extern bool precacheSetup;
58         extern bool precachePixel;
59
60         int batchSize = 128;
61         int threadCount = 1;
62         int unitCount = 1;
63         int clusterCount = 1;
64
65         TranscendentalPrecision logPrecision = ACCURATE;
66         TranscendentalPrecision expPrecision = ACCURATE;
67         TranscendentalPrecision rcpPrecision = ACCURATE;
68         TranscendentalPrecision rsqPrecision = ACCURATE;
69         bool perspectiveCorrection = true;
70
71         struct Parameters
72         {
73                 Renderer *renderer;
74                 int threadIndex;
75         };
76
77         DrawCall::DrawCall()
78         {
79                 queries = 0;
80
81                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
82                 vsDirtyConstI = 16;
83                 vsDirtyConstB = 16;
84
85                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
86                 psDirtyConstI = 16;
87                 psDirtyConstB = 16;
88
89                 references = -1;
90
91                 data = (DrawData*)allocate(sizeof(DrawData));
92                 data->constants = &constants;
93         }
94
95         DrawCall::~DrawCall()
96         {
97                 delete queries;
98
99                 deallocate(data);
100         }
101
102         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
103         {
104                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
105                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
106                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
107                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
108                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
109                 sw::secondaryColor = conventions.secondaryColor;
110                 sw::exactColorRounding = exactColorRounding;
111
112                 setRenderTarget(0, 0);
113                 clipper = new Clipper();
114
115                 updateViewMatrix = true;
116                 updateBaseMatrix = true;
117                 updateProjectionMatrix = true;
118                 updateClipPlanes = true;
119
120                 #if PERF_HUD
121                         resetTimers();
122                 #endif
123
124                 for(int i = 0; i < 16; i++)
125                 {
126                         vertexTask[i] = 0;
127
128                         worker[i] = 0;
129                         resume[i] = 0;
130                         suspend[i] = 0;
131                 }
132
133                 threadsAwake = 0;
134                 resumeApp = new Event();
135
136                 currentDraw = 0;
137                 nextDraw = 0;
138
139                 qHead = 0;
140                 qSize = 0;
141
142                 for(int i = 0; i < 16; i++)
143                 {
144                         triangleBatch[i] = 0;
145                         primitiveBatch[i] = 0;
146                 }
147
148                 for(int draw = 0; draw < DRAW_COUNT; draw++)
149                 {
150                         drawCall[draw] = new DrawCall();
151                         drawList[draw] = drawCall[draw];
152                 }
153
154                 for(int unit = 0; unit < 16; unit++)
155                 {
156                         primitiveProgress[unit].init();
157                 }
158
159                 for(int cluster = 0; cluster < 16; cluster++)
160                 {
161                         pixelProgress[cluster].init();
162                 }
163
164                 clipFlags = 0;
165
166                 swiftConfig = new SwiftConfig(disableServer);
167                 updateConfiguration(true);
168
169                 sync = new Resource(0);
170         }
171
172         Renderer::~Renderer()
173         {
174                 sync->destruct();
175
176                 delete clipper;
177                 clipper = 0;
178
179                 terminateThreads();
180                 delete resumeApp;
181
182                 for(int draw = 0; draw < DRAW_COUNT; draw++)
183                 {
184                         delete drawCall[draw];
185                 }
186
187                 delete swiftConfig;
188         }
189
190         void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
191         {
192                 blitter.clear(pixel, format, dest, dRect, rgbaMask);
193         }
194
195         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
196         {
197                 blitter.blit(source, sRect, dest, dRect, filter);
198         }
199
200         void Renderer::blit3D(Surface *source, Surface *dest)
201         {
202                 blitter.blit3D(source, dest);
203         }
204
205         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
206         {
207                 #ifndef NDEBUG
208                         if(count < minPrimitives || count > maxPrimitives)
209                         {
210                                 return;
211                         }
212                 #endif
213
214                 context->drawType = drawType;
215
216                 updateConfiguration();
217                 updateClipper();
218
219                 int ss = context->getSuperSampleCount();
220                 int ms = context->getMultiSampleCount();
221
222                 for(int q = 0; q < ss; q++)
223                 {
224                         unsigned int oldMultiSampleMask = context->multiSampleMask;
225                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
226
227                         if(!context->multiSampleMask)
228                         {
229                                 continue;
230                         }
231
232                         sync->lock(sw::PRIVATE);
233
234                         Routine *vertexRoutine;
235                         Routine *setupRoutine;
236                         Routine *pixelRoutine;
237
238                         if(update || oldMultiSampleMask != context->multiSampleMask)
239                         {
240                                 vertexState = VertexProcessor::update();
241                                 setupState = SetupProcessor::update();
242                                 pixelState = PixelProcessor::update();
243
244                                 vertexRoutine = VertexProcessor::routine(vertexState);
245                                 setupRoutine = SetupProcessor::routine(setupState);
246                                 pixelRoutine = PixelProcessor::routine(pixelState);
247                         }
248
249                         int batch = batchSize / ms;
250
251                         int (*setupPrimitives)(Renderer *renderer, int batch, int count);
252
253                         if(context->isDrawTriangle())
254                         {
255                                 switch(context->fillMode)
256                                 {
257                                 case FILL_SOLID:
258                                         setupPrimitives = setupSolidTriangles;
259                                         break;
260                                 case FILL_WIREFRAME:
261                                         setupPrimitives = setupWireframeTriangle;
262                                         batch = 1;
263                                         break;
264                                 case FILL_VERTEX:
265                                         setupPrimitives = setupVertexTriangle;
266                                         batch = 1;
267                                         break;
268                                 default: ASSERT(false);
269                                 }
270                         }
271                         else if(context->isDrawLine())
272                         {
273                                 setupPrimitives = setupLines;
274                         }
275                         else   // Point draw
276                         {
277                                 setupPrimitives = setupPoints;
278                         }
279
280                         DrawCall *draw = 0;
281
282                         do
283                         {
284                                 for(int i = 0; i < DRAW_COUNT; i++)
285                                 {
286                                         if(drawCall[i]->references == -1)
287                                         {
288                                                 draw = drawCall[i];
289                                                 drawList[nextDraw % DRAW_COUNT] = draw;
290
291                                                 break;
292                                         }
293                                 }
294
295                                 if(!draw)
296                                 {
297                                         resumeApp->wait();
298                                 }
299                         }
300                         while(!draw);
301
302                         DrawData *data = draw->data;
303
304                         if(queries.size() != 0)
305                         {
306                                 draw->queries = new std::list<Query*>();
307                                 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
308                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
309                                 {
310                                         Query* q = *query;
311                                         if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
312                                         {
313                                                 atomicIncrement(&(q->reference));
314                                                 draw->queries->push_back(q);
315                                         }
316                                 }
317                         }
318
319                         draw->drawType = drawType;
320                         draw->batchSize = batch;
321
322                         vertexRoutine->bind();
323                         setupRoutine->bind();
324                         pixelRoutine->bind();
325
326                         draw->vertexRoutine = vertexRoutine;
327                         draw->setupRoutine = setupRoutine;
328                         draw->pixelRoutine = pixelRoutine;
329                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
330                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
331                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
332                         draw->setupPrimitives = setupPrimitives;
333                         draw->setupState = setupState;
334
335                         for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
336                         {
337                                 draw->vertexStream[i] = context->input[i].resource;
338                                 data->input[i] = context->input[i].buffer;
339                                 data->stride[i] = context->input[i].stride;
340
341                                 if(draw->vertexStream[i])
342                                 {
343                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
344                                 }
345                         }
346
347                         if(context->indexBuffer)
348                         {
349                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
350                         }
351
352                         draw->indexBuffer = context->indexBuffer;
353
354                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
355                         {
356                                 draw->texture[sampler] = 0;
357                         }
358
359                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
360                         {
361                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
362                                 {
363                                         draw->texture[sampler] = context->texture[sampler];
364                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
365
366                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
367                                 }
368                         }
369
370                         if(context->pixelShader)
371                         {
372                                 if(draw->psDirtyConstF)
373                                 {
374                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
375                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
376                                         draw->psDirtyConstF = 0;
377                                 }
378
379                                 if(draw->psDirtyConstI)
380                                 {
381                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
382                                         draw->psDirtyConstI = 0;
383                                 }
384
385                                 if(draw->psDirtyConstB)
386                                 {
387                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
388                                         draw->psDirtyConstB = 0;
389                                 }
390
391                                 PixelProcessor::lockUniformBuffers(data->ps.u);
392                         }
393                         
394                         if(context->pixelShaderVersion() <= 0x0104)
395                         {
396                                 for(int stage = 0; stage < 8; stage++)
397                                 {
398                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
399                                         {
400                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
401                                         }
402                                         else break;
403                                 }
404                         }
405
406                         if(context->vertexShader)
407                         {
408                                 if(context->vertexShader->getVersion() >= 0x0300)
409                                 {
410                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
411                                         {
412                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
413                                                 {
414                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
415                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
416
417                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
418                                                 }
419                                         }
420                                 }
421
422                                 if(draw->vsDirtyConstF)
423                                 {
424                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
425                                         draw->vsDirtyConstF = 0;
426                                 }
427
428                                 if(draw->vsDirtyConstI)
429                                 {
430                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
431                                         draw->vsDirtyConstI = 0;
432                                 }
433
434                                 if(draw->vsDirtyConstB)
435                                 {
436                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
437                                         draw->vsDirtyConstB = 0;
438                                 }
439
440                                 if(context->vertexShader->instanceIdDeclared)
441                                 {
442                                         data->instanceID = context->instanceID;
443                                 }
444
445                                 VertexProcessor::lockUniformBuffers(data->vs.u);
446                         }
447                         else
448                         {
449                                 data->ff = ff;
450
451                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
452                                 draw->vsDirtyConstI = 16;
453                                 draw->vsDirtyConstB = 16;
454                         }
455
456                         if(pixelState.stencilActive)
457                         {
458                                 data->stencil[0] = stencil;
459                                 data->stencil[1] = stencilCCW;
460                         }
461
462                         if(pixelState.fogActive)
463                         {
464                                 data->fog = fog;
465                         }
466
467                         if(setupState.isDrawPoint)
468                         {
469                                 data->point = point;
470                         }
471
472                         data->lineWidth = context->lineWidth;
473
474                         data->factor = factor;
475
476                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
477                         {
478                                 float ref = context->alphaReference * (1.0f / 255.0f);
479                                 float margin = sw::min(ref, 1.0f - ref);
480
481                                 if(ms == 4)
482                                 {
483                                         data->a2c0 = replicate(ref - margin * 0.6f);
484                                         data->a2c1 = replicate(ref - margin * 0.2f);
485                                         data->a2c2 = replicate(ref + margin * 0.2f);
486                                         data->a2c3 = replicate(ref + margin * 0.6f);
487                                 }
488                                 else if(ms == 2)
489                                 {
490                                         data->a2c0 = replicate(ref - margin * 0.3f);
491                                         data->a2c1 = replicate(ref + margin * 0.3f);
492                                 }
493                                 else ASSERT(false);
494                         }
495
496                         if(pixelState.occlusionEnabled)
497                         {
498                                 for(int cluster = 0; cluster < clusterCount; cluster++)
499                                 {
500                                         data->occlusion[cluster] = 0;
501                                 }
502                         }
503
504                         #if PERF_PROFILE
505                                 for(int cluster = 0; cluster < clusterCount; cluster++)
506                                 {
507                                         for(int i = 0; i < PERF_TIMERS; i++)
508                                         {
509                                                 data->cycles[i][cluster] = 0;
510                                         }
511                                 }
512                         #endif
513
514                         // Viewport
515                         {
516                                 float W = 0.5f * viewport.width;
517                                 float H = 0.5f * viewport.height;
518                                 float X0 = viewport.x0 + W;
519                                 float Y0 = viewport.y0 + H;
520                                 float N = viewport.minZ;
521                                 float F = viewport.maxZ;
522                                 float Z = F - N;
523
524                                 if(context->isDrawTriangle(false))
525                                 {
526                                         N += depthBias;
527                                 }
528
529                                 if(complementaryDepthBuffer)
530                                 {
531                                         Z = -Z;
532                                         N = 1 - N;
533                                 }
534
535                                 static const float X[5][16] =   // Fragment offsets
536                                 {
537                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
538                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
539                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
540                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
541                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
542                                 };
543
544                                 static const float Y[5][16] =   // Fragment offsets
545                                 {
546                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
547                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
548                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
549                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
550                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
551                                 };
552
553                                 int s = sw::log2(ss);
554
555                                 data->Wx16 = replicate(W * 16);
556                                 data->Hx16 = replicate(H * 16);
557                                 data->X0x16 = replicate(X0 * 16 - 8);
558                                 data->Y0x16 = replicate(Y0 * 16 - 8);
559                                 data->XXXX = replicate(X[s][q] / W);
560                                 data->YYYY = replicate(Y[s][q] / H);
561                                 data->halfPixelX = replicate(0.5f / W);
562                                 data->halfPixelY = replicate(0.5f / H);
563                                 data->viewportHeight = abs(viewport.height);
564                                 data->slopeDepthBias = slopeDepthBias;
565                                 data->depthRange = Z;
566                                 data->depthNear = N;
567                                 draw->clipFlags = clipFlags;
568
569                                 if(clipFlags)
570                                 {
571                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
572                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
573                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
574                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
575                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
576                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
577                                 }
578                         }
579
580                         // Target
581                         {
582                                 for(int index = 0; index < RENDERTARGETS; index++)
583                                 {
584                                         draw->renderTarget[index] = context->renderTarget[index];
585
586                                         if(draw->renderTarget[index])
587                                         {
588                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
589                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
590                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
591                                         }
592                                 }
593
594                                 draw->depthBuffer = context->depthBuffer;
595                                 draw->stencilBuffer = context->stencilBuffer;
596
597                                 if(draw->depthBuffer)
598                                 {
599                                         data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
600                                         data->depthPitchB = context->depthBuffer->getInternalPitchB();
601                                         data->depthSliceB = context->depthBuffer->getInternalSliceB();
602                                 }
603
604                                 if(draw->stencilBuffer)
605                                 {
606                                         data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(q * ms, MANAGED);
607                                         data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
608                                         data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
609                                 }
610                         }
611
612                         // Scissor
613                         {
614                                 data->scissorX0 = scissor.x0;
615                                 data->scissorX1 = scissor.x1;
616                                 data->scissorY0 = scissor.y0;
617                                 data->scissorY1 = scissor.y1;
618                         }
619
620                         draw->primitive = 0;
621                         draw->count = count;
622
623                         draw->references = (count + batch - 1) / batch;
624
625                         schedulerMutex.lock();
626                         nextDraw++;
627                         schedulerMutex.unlock();
628
629                         if(threadCount > 1)
630                         {
631                                 if(!threadsAwake)
632                                 {
633                                         suspend[0]->wait();
634
635                                         threadsAwake = 1;
636                                         task[0].type = Task::RESUME;
637
638                                         resume[0]->signal();
639                                 }
640                         }
641                         else   // Use main thread for draw execution
642                         {
643                                 threadsAwake = 1;
644                                 task[0].type = Task::RESUME;
645
646                                 taskLoop(0);
647                         }
648                 }
649         }
650
651         void Renderer::threadFunction(void *parameters)
652         {
653                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
654                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
655
656                 if(logPrecision < IEEE)
657                 {
658                         CPUID::setFlushToZero(true);
659                         CPUID::setDenormalsAreZero(true);
660                 }
661
662                 renderer->threadLoop(threadIndex);
663         }
664
665         void Renderer::threadLoop(int threadIndex)
666         {
667                 while(!exitThreads)
668                 {
669                         taskLoop(threadIndex);
670
671                         suspend[threadIndex]->signal();
672                         resume[threadIndex]->wait();
673                 }
674         }
675
676         void Renderer::taskLoop(int threadIndex)
677         {
678                 while(task[threadIndex].type != Task::SUSPEND)
679                 {
680                         scheduleTask(threadIndex);
681                         executeTask(threadIndex);
682                 }
683         }
684
685         void Renderer::findAvailableTasks()
686         {
687                 // Find pixel tasks
688                 for(int cluster = 0; cluster < clusterCount; cluster++)
689                 {
690                         if(!pixelProgress[cluster].executing)
691                         {
692                                 for(int unit = 0; unit < unitCount; unit++)
693                                 {
694                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
695                                         {
696                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
697                                                 {
698                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
699                                                         {
700                                                                 Task &task = taskQueue[qHead];
701                                                                 task.type = Task::PIXELS;
702                                                                 task.primitiveUnit = unit;
703                                                                 task.pixelCluster = cluster;
704
705                                                                 pixelProgress[cluster].executing = true;
706
707                                                                 // Commit to the task queue
708                                                                 qHead = (qHead + 1) % 32;
709                                                                 qSize++;
710
711                                                                 break;
712                                                         }
713                                                 }
714                                         }
715                                 }
716                         }
717                 }
718         
719                 // Find primitive tasks
720                 if(currentDraw == nextDraw)
721                 {
722                         return;   // No more primitives to process
723                 }
724
725                 for(int unit = 0; unit < unitCount; unit++)
726                 {
727                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
728
729                         if(draw->primitive >= draw->count)
730                         {
731                                 currentDraw++;
732
733                                 if(currentDraw == nextDraw)
734                                 {
735                                         return;   // No more primitives to process
736                                 }
737
738                                 draw = drawList[currentDraw % DRAW_COUNT];
739                         }
740
741                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
742                         {
743                                 int primitive = draw->primitive;
744                                 int count = draw->count;
745                                 int batch = draw->batchSize;
746
747                                 primitiveProgress[unit].drawCall = currentDraw;
748                                 primitiveProgress[unit].firstPrimitive = primitive;
749                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
750
751                                 draw->primitive += batch;
752
753                                 Task &task = taskQueue[qHead];
754                                 task.type = Task::PRIMITIVES;
755                                 task.primitiveUnit = unit;
756
757                                 primitiveProgress[unit].references = -1;
758
759                                 // Commit to the task queue
760                                 qHead = (qHead + 1) % 32;
761                                 qSize++;
762                         }
763                 }
764         }
765
766         void Renderer::scheduleTask(int threadIndex)
767         {
768                 schedulerMutex.lock();
769
770                 if((int)qSize < threadCount - threadsAwake + 1)
771                 {
772                         findAvailableTasks();
773                 }
774
775                 if(qSize != 0)
776                 {
777                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
778                         qSize--;
779
780                         if(threadsAwake != threadCount)
781                         {
782                                 int wakeup = qSize - threadsAwake + 1;
783
784                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
785                                 {
786                                         if(task[i].type == Task::SUSPEND)
787                                         {
788                                                 suspend[i]->wait();
789                                                 task[i].type = Task::RESUME;
790                                                 resume[i]->signal();
791
792                                                 threadsAwake++;
793                                                 wakeup--;
794                                         }
795                                 }
796                         }
797                 }
798                 else
799                 {
800                         task[threadIndex].type = Task::SUSPEND;
801
802                         threadsAwake--;
803                 }
804
805                 schedulerMutex.unlock();
806         }
807
808         void Renderer::executeTask(int threadIndex)
809         {
810                 #if PERF_HUD
811                         int64_t startTick = Timer::ticks();
812                 #endif
813
814                 switch(task[threadIndex].type)
815                 {
816                 case Task::PRIMITIVES:
817                         {
818                                 int unit = task[threadIndex].primitiveUnit;
819                                 
820                                 int input = primitiveProgress[unit].firstPrimitive;
821                                 int count = primitiveProgress[unit].primitiveCount;
822                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
823                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
824
825                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
826
827                                 #if PERF_HUD
828                                         int64_t time = Timer::ticks();
829                                         vertexTime[threadIndex] += time - startTick;
830                                         startTick = time;
831                                 #endif
832
833                                 int visible = draw->setupState.rasterizerDiscard ? 0 : setupPrimitives(this, unit, count);
834
835                                 primitiveProgress[unit].visible = visible;
836                                 primitiveProgress[unit].references = clusterCount;
837
838                                 #if PERF_HUD
839                                         setupTime[threadIndex] += Timer::ticks() - startTick;
840                                 #endif
841                         }
842                         break;
843                 case Task::PIXELS:
844                         {
845                                 int unit = task[threadIndex].primitiveUnit;
846                                 int visible = primitiveProgress[unit].visible;
847
848                                 if(visible > 0)
849                                 {
850                                         int cluster = task[threadIndex].pixelCluster;
851                                         Primitive *primitive = primitiveBatch[unit];
852                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
853                                         DrawData *data = draw->data;
854                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
855
856                                         pixelRoutine(primitive, visible, cluster, data);
857                                 }
858
859                                 finishRendering(task[threadIndex]);
860
861                                 #if PERF_HUD
862                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
863                                 #endif
864                         }
865                         break;
866                 case Task::RESUME:
867                         break;
868                 case Task::SUSPEND:
869                         break;
870                 default:
871                         ASSERT(false);
872                 }
873         }
874
875         void Renderer::synchronize()
876         {
877                 sync->lock(sw::PUBLIC);
878                 sync->unlock();
879         }
880
881         void Renderer::finishRendering(Task &pixelTask)
882         {
883                 int unit = pixelTask.primitiveUnit;
884                 int cluster = pixelTask.pixelCluster;
885
886                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
887                 DrawData &data = *draw.data;
888                 int primitive = primitiveProgress[unit].firstPrimitive;
889                 int count = primitiveProgress[unit].primitiveCount;
890
891                 pixelProgress[cluster].processedPrimitives = primitive + count;
892
893                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
894                 {
895                         pixelProgress[cluster].drawCall++;
896                         pixelProgress[cluster].processedPrimitives = 0;
897                 }
898
899                 int ref = atomicDecrement(&primitiveProgress[unit].references);
900
901                 if(ref == 0)
902                 {
903                         ref = atomicDecrement(&draw.references);
904
905                         if(ref == 0)
906                         {
907                                 #if PERF_PROFILE
908                                         for(int cluster = 0; cluster < clusterCount; cluster++)
909                                         {
910                                                 for(int i = 0; i < PERF_TIMERS; i++)
911                                                 {
912                                                         profiler.cycles[i] += data.cycles[i][cluster];
913                                                 }
914                                         }
915                                 #endif
916
917                                 if(draw.queries)
918                                 {
919                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
920                                         {
921                                                 Query *query = *q;
922
923                                                 switch(query->type)
924                                                 {
925                                                 case Query::FRAGMENTS_PASSED:
926                                                         for(int cluster = 0; cluster < clusterCount; cluster++)
927                                                         {
928                                                                 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
929                                                         }
930                                                         break;
931                                                 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
932                                                         atomicAdd((volatile int*)&query->data, pixelProgress[cluster].processedPrimitives);
933                                                         break;
934                                                 default:
935                                                         break;
936                                                 }
937
938                                                 atomicDecrement(&query->reference);
939                                         }
940
941                                         delete draw.queries;
942                                         draw.queries = 0;
943                                 }
944
945                                 for(int i = 0; i < RENDERTARGETS; i++)
946                                 {
947                                         if(draw.renderTarget[i])
948                                         {
949                                                 draw.renderTarget[i]->unlockInternal();
950                                         }
951                                 }
952
953                                 if(draw.depthBuffer)
954                                 {
955                                         draw.depthBuffer->unlockInternal();
956                                 }
957
958                                 if(draw.stencilBuffer)
959                                 {
960                                         draw.stencilBuffer->unlockStencil();
961                                 }
962
963                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
964                                 {
965                                         if(draw.texture[i])
966                                         {
967                                                 draw.texture[i]->unlock();
968                                         }
969                                 }
970
971                                 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
972                                 {
973                                         if(draw.vertexStream[i])
974                                         {
975                                                 draw.vertexStream[i]->unlock();
976                                         }
977                                 }
978
979                                 if(draw.indexBuffer)
980                                 {
981                                         draw.indexBuffer->unlock();
982                                 }
983
984                                 PixelProcessor::unlockUniformBuffers();
985                                 VertexProcessor::unlockUniformBuffers();
986
987                                 draw.vertexRoutine->unbind();
988                                 draw.setupRoutine->unbind();
989                                 draw.pixelRoutine->unbind();
990
991                                 sync->unlock();
992
993                                 draw.references = -1;
994                                 resumeApp->signal();
995                         }
996                 }
997
998                 pixelProgress[cluster].executing = false;
999         }
1000
1001         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1002         {
1003                 Triangle *triangle = triangleBatch[unit];
1004                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1005                 DrawData *data = draw->data;
1006                 VertexTask *task = vertexTask[thread];
1007
1008                 const void *indices = data->indices;
1009                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1010
1011                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1012                 {
1013                         task->vertexCache.clear();
1014                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1015                 }
1016
1017                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1018
1019                 switch(draw->drawType)
1020                 {
1021                 case DRAW_POINTLIST:
1022                         {
1023                                 unsigned int index = start;
1024
1025                                 for(unsigned int i = 0; i < triangleCount; i++)
1026                                 {
1027                                         batch[i][0] = index;
1028                                         batch[i][1] = index;
1029                                         batch[i][2] = index;
1030
1031                                         index += 1;
1032                                 }
1033                         }
1034                         break;
1035                 case DRAW_LINELIST:
1036                         {
1037                                 unsigned int index = 2 * start;
1038
1039                                 for(unsigned int i = 0; i < triangleCount; i++)
1040                                 {
1041                                         batch[i][0] = index + 0;
1042                                         batch[i][1] = index + 1;
1043                                         batch[i][2] = index + 1;
1044
1045                                         index += 2;
1046                                 }
1047                         }
1048                         break;
1049                 case DRAW_LINESTRIP:
1050                         {
1051                                 unsigned int index = start;
1052
1053                                 for(unsigned int i = 0; i < triangleCount; i++)
1054                                 {
1055                                         batch[i][0] = index + 0;
1056                                         batch[i][1] = index + 1;
1057                                         batch[i][2] = index + 1;
1058
1059                                         index += 1;
1060                                 }
1061                         }
1062                         break;
1063                 case DRAW_LINELOOP:
1064                         {
1065                                 unsigned int index = start;
1066
1067                                 for(unsigned int i = 0; i < triangleCount; i++)
1068                                 {
1069                                         batch[i][0] = (index + 0) % loop;
1070                                         batch[i][1] = (index + 1) % loop;
1071                                         batch[i][2] = (index + 1) % loop;
1072
1073                                         index += 1;
1074                                 }
1075                         }
1076                         break;
1077                 case DRAW_TRIANGLELIST:
1078                         {
1079                                 unsigned int index = 3 * start;
1080
1081                                 for(unsigned int i = 0; i < triangleCount; i++)
1082                                 {
1083                                         batch[i][0] = index + 0;
1084                                         batch[i][1] = index + 1;
1085                                         batch[i][2] = index + 2;
1086
1087                                         index += 3;
1088                                 }
1089                         }
1090                         break;
1091                 case DRAW_TRIANGLESTRIP:
1092                         {
1093                                 unsigned int index = start;
1094
1095                                 for(unsigned int i = 0; i < triangleCount; i++)
1096                                 {
1097                                         batch[i][0] = index + 0;
1098                                         batch[i][1] = index + (index & 1) + 1;
1099                                         batch[i][2] = index + (~index & 1) + 1;
1100
1101                                         index += 1;
1102                                 }
1103                         }
1104                         break;
1105                 case DRAW_TRIANGLEFAN:
1106                         {
1107                                 unsigned int index = start;
1108
1109                                 for(unsigned int i = 0; i < triangleCount; i++)
1110                                 {
1111                                         batch[i][0] = index + 1;
1112                                         batch[i][1] = index + 2;
1113                                         batch[i][2] = 0;
1114
1115                                         index += 1;
1116                                 }
1117                         }
1118                         break;
1119                 case DRAW_INDEXEDPOINTLIST8:
1120                         {
1121                                 const unsigned char *index = (const unsigned char*)indices + start;
1122
1123                                 for(unsigned int i = 0; i < triangleCount; i++)
1124                                 {
1125                                         batch[i][0] = *index;
1126                                         batch[i][1] = *index;
1127                                         batch[i][2] = *index;
1128
1129                                         index += 1;
1130                                 }
1131                         }
1132                         break;
1133                 case DRAW_INDEXEDPOINTLIST16:
1134                         {
1135                                 const unsigned short *index = (const unsigned short*)indices + start;
1136
1137                                 for(unsigned int i = 0; i < triangleCount; i++)
1138                                 {
1139                                         batch[i][0] = *index;
1140                                         batch[i][1] = *index;
1141                                         batch[i][2] = *index;
1142
1143                                         index += 1;
1144                                 }
1145                         }
1146                         break;
1147                 case DRAW_INDEXEDPOINTLIST32:
1148                         {
1149                                 const unsigned int *index = (const unsigned int*)indices + start;
1150
1151                                 for(unsigned int i = 0; i < triangleCount; i++)
1152                                 {
1153                                         batch[i][0] = *index;
1154                                         batch[i][1] = *index;
1155                                         batch[i][2] = *index;
1156
1157                                         index += 1;
1158                                 }
1159                         }
1160                         break;
1161                 case DRAW_INDEXEDLINELIST8:
1162                         {
1163                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1164
1165                                 for(unsigned int i = 0; i < triangleCount; i++)
1166                                 {
1167                                         batch[i][0] = index[0];
1168                                         batch[i][1] = index[1];
1169                                         batch[i][2] = index[1];
1170
1171                                         index += 2;
1172                                 }
1173                         }
1174                         break;
1175                 case DRAW_INDEXEDLINELIST16:
1176                         {
1177                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1178
1179                                 for(unsigned int i = 0; i < triangleCount; i++)
1180                                 {
1181                                         batch[i][0] = index[0];
1182                                         batch[i][1] = index[1];
1183                                         batch[i][2] = index[1];
1184
1185                                         index += 2;
1186                                 }
1187                         }
1188                         break;
1189                 case DRAW_INDEXEDLINELIST32:
1190                         {
1191                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1192
1193                                 for(unsigned int i = 0; i < triangleCount; i++)
1194                                 {
1195                                         batch[i][0] = index[0];
1196                                         batch[i][1] = index[1];
1197                                         batch[i][2] = index[1];
1198
1199                                         index += 2;
1200                                 }
1201                         }
1202                         break;
1203                 case DRAW_INDEXEDLINESTRIP8:
1204                         {
1205                                 const unsigned char *index = (const unsigned char*)indices + start;
1206
1207                                 for(unsigned int i = 0; i < triangleCount; i++)
1208                                 {
1209                                         batch[i][0] = index[0];
1210                                         batch[i][1] = index[1];
1211                                         batch[i][2] = index[1];
1212
1213                                         index += 1;
1214                                 }
1215                         }
1216                         break;
1217                 case DRAW_INDEXEDLINESTRIP16:
1218                         {
1219                                 const unsigned short *index = (const unsigned short*)indices + start;
1220
1221                                 for(unsigned int i = 0; i < triangleCount; i++)
1222                                 {
1223                                         batch[i][0] = index[0];
1224                                         batch[i][1] = index[1];
1225                                         batch[i][2] = index[1];
1226
1227                                         index += 1;
1228                                 }
1229                         }
1230                         break;
1231                 case DRAW_INDEXEDLINESTRIP32:
1232                         {
1233                                 const unsigned int *index = (const unsigned int*)indices + start;
1234
1235                                 for(unsigned int i = 0; i < triangleCount; i++)
1236                                 {
1237                                         batch[i][0] = index[0];
1238                                         batch[i][1] = index[1];
1239                                         batch[i][2] = index[1];
1240
1241                                         index += 1;
1242                                 }
1243                         }
1244                         break;
1245                 case DRAW_INDEXEDLINELOOP8:
1246                         {
1247                                 const unsigned char *index = (const unsigned char*)indices;
1248
1249                                 for(unsigned int i = 0; i < triangleCount; i++)
1250                                 {
1251                                         batch[i][0] = index[(start + i + 0) % loop];
1252                                         batch[i][1] = index[(start + i + 1) % loop];
1253                                         batch[i][2] = index[(start + i + 1) % loop];
1254                                 }
1255                         }
1256                         break;
1257                 case DRAW_INDEXEDLINELOOP16:
1258                         {
1259                                 const unsigned short *index = (const unsigned short*)indices;
1260
1261                                 for(unsigned int i = 0; i < triangleCount; i++)
1262                                 {
1263                                         batch[i][0] = index[(start + i + 0) % loop];
1264                                         batch[i][1] = index[(start + i + 1) % loop];
1265                                         batch[i][2] = index[(start + i + 1) % loop];
1266                                 }
1267                         }
1268                         break;
1269                 case DRAW_INDEXEDLINELOOP32:
1270                         {
1271                                 const unsigned int *index = (const unsigned int*)indices;
1272
1273                                 for(unsigned int i = 0; i < triangleCount; i++)
1274                                 {
1275                                         batch[i][0] = index[(start + i + 0) % loop];
1276                                         batch[i][1] = index[(start + i + 1) % loop];
1277                                         batch[i][2] = index[(start + i + 1) % loop];
1278                                 }
1279                         }
1280                         break;
1281                 case DRAW_INDEXEDTRIANGLELIST8:
1282                         {
1283                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1284
1285                                 for(unsigned int i = 0; i < triangleCount; i++)
1286                                 {
1287                                         batch[i][0] = index[0];
1288                                         batch[i][1] = index[1];
1289                                         batch[i][2] = index[2];
1290
1291                                         index += 3;
1292                                 }
1293                         }
1294                         break;
1295                 case DRAW_INDEXEDTRIANGLELIST16:
1296                         {
1297                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1298
1299                                 for(unsigned int i = 0; i < triangleCount; i++)
1300                                 {
1301                                         batch[i][0] = index[0];
1302                                         batch[i][1] = index[1];
1303                                         batch[i][2] = index[2];
1304
1305                                         index += 3;
1306                                 }
1307                         }
1308                         break;
1309                 case DRAW_INDEXEDTRIANGLELIST32:
1310                         {
1311                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1312
1313                                 for(unsigned int i = 0; i < triangleCount; i++)
1314                                 {
1315                                         batch[i][0] = index[0];
1316                                         batch[i][1] = index[1];
1317                                         batch[i][2] = index[2];
1318
1319                                         index += 3;
1320                                 }
1321                         }
1322                         break;
1323                 case DRAW_INDEXEDTRIANGLESTRIP8:
1324                         {
1325                                 const unsigned char *index = (const unsigned char*)indices + start;
1326
1327                                 for(unsigned int i = 0; i < triangleCount; i++)
1328                                 {
1329                                         batch[i][0] = index[0];
1330                                         batch[i][1] = index[((start + i) & 1) + 1];
1331                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1332
1333                                         index += 1;
1334                                 }
1335                         }
1336                         break;
1337                 case DRAW_INDEXEDTRIANGLESTRIP16:
1338                         {
1339                                 const unsigned short *index = (const unsigned short*)indices + start;
1340
1341                                 for(unsigned int i = 0; i < triangleCount; i++)
1342                                 {
1343                                         batch[i][0] = index[0];
1344                                         batch[i][1] = index[((start + i) & 1) + 1];
1345                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1346
1347                                         index += 1;
1348                                 }
1349                         }
1350                         break;
1351                 case DRAW_INDEXEDTRIANGLESTRIP32:
1352                         {
1353                                 const unsigned int *index = (const unsigned int*)indices + start;
1354
1355                                 for(unsigned int i = 0; i < triangleCount; i++)
1356                                 {
1357                                         batch[i][0] = index[0];
1358                                         batch[i][1] = index[((start + i) & 1) + 1];
1359                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1360
1361                                         index += 1;
1362                                 }
1363                         }
1364                         break;
1365                 case DRAW_INDEXEDTRIANGLEFAN8:
1366                         {
1367                                 const unsigned char *index = (const unsigned char*)indices;
1368
1369                                 for(unsigned int i = 0; i < triangleCount; i++)
1370                                 {
1371                                         batch[i][0] = index[start + i + 1];
1372                                         batch[i][1] = index[start + i + 2];
1373                                         batch[i][2] = index[0];
1374                                 }
1375                         }
1376                         break;
1377                 case DRAW_INDEXEDTRIANGLEFAN16:
1378                         {
1379                                 const unsigned short *index = (const unsigned short*)indices;
1380
1381                                 for(unsigned int i = 0; i < triangleCount; i++)
1382                                 {
1383                                         batch[i][0] = index[start + i + 1];
1384                                         batch[i][1] = index[start + i + 2];
1385                                         batch[i][2] = index[0];
1386                                 }
1387                         }
1388                         break;
1389                 case DRAW_INDEXEDTRIANGLEFAN32:
1390                         {
1391                                 const unsigned int *index = (const unsigned int*)indices;
1392
1393                                 for(unsigned int i = 0; i < triangleCount; i++)
1394                                 {
1395                                         batch[i][0] = index[start + i + 1];
1396                                         batch[i][1] = index[start + i + 2];
1397                                         batch[i][2] = index[0];
1398                                 }
1399                         }
1400                         break;
1401         case DRAW_QUADLIST:
1402                         {
1403                                 unsigned int index = 4 * start / 2;
1404
1405                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1406                                 {
1407                                         batch[i+0][0] = index + 0;
1408                                         batch[i+0][1] = index + 1;
1409                                         batch[i+0][2] = index + 2;
1410
1411                     batch[i+1][0] = index + 0;
1412                                         batch[i+1][1] = index + 2;
1413                                         batch[i+1][2] = index + 3;
1414
1415                                         index += 4;
1416                                 }
1417                         }
1418                         break;
1419                 default:
1420                         ASSERT(false);
1421                         return;
1422                 }
1423
1424                 task->vertexCount = triangleCount * 3;
1425                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1426         }
1427
1428         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1429         {
1430                 Triangle *triangle = renderer->triangleBatch[unit];
1431                 Primitive *primitive = renderer->primitiveBatch[unit];
1432
1433                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1434                 SetupProcessor::State &state = draw.setupState;
1435                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1436
1437                 int ms = state.multiSample;
1438                 int pos = state.positionRegister;
1439                 const DrawData *data = draw.data;
1440                 int visible = 0;
1441
1442                 for(int i = 0; i < count; i++, triangle++)
1443                 {
1444                         Vertex &v0 = triangle->v0;
1445                         Vertex &v1 = triangle->v1;
1446                         Vertex &v2 = triangle->v2;
1447
1448                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1449                         {
1450                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1451
1452                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1453
1454                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1455                                 {
1456                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1457                                         {
1458                                                 continue;
1459                                         }
1460                                 }
1461
1462                                 if(setupRoutine(primitive, triangle, &polygon, data))
1463                                 {
1464                                         primitive += ms;
1465                                         visible++;
1466                                 }
1467                         }
1468                 }
1469
1470                 return visible;
1471         }
1472
1473         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1474         {
1475                 Triangle *triangle = renderer->triangleBatch[unit];
1476                 Primitive *primitive = renderer->primitiveBatch[unit];
1477                 int visible = 0;
1478
1479                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1480                 SetupProcessor::State &state = draw.setupState;
1481                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1482
1483                 const Vertex &v0 = triangle[0].v0;
1484                 const Vertex &v1 = triangle[0].v1;
1485                 const Vertex &v2 = triangle[0].v2;
1486
1487                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1488
1489                 if(state.cullMode == CULL_CLOCKWISE)
1490                 {
1491                         if(d >= 0) return 0;
1492                 }
1493                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1494                 {
1495                         if(d <= 0) return 0;
1496                 }
1497
1498                 // Copy attributes
1499                 triangle[1].v0 = v1;
1500                 triangle[1].v1 = v2;
1501                 triangle[2].v0 = v2;
1502                 triangle[2].v1 = v0;
1503
1504                 if(state.color[0][0].flat)   // FIXME
1505                 {
1506                         for(int i = 0; i < 2; i++)
1507                         {
1508                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1509                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1510                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1511                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1512                         }
1513                 }
1514
1515                 for(int i = 0; i < 3; i++)
1516                 {
1517                         if(setupLine(renderer, *primitive, *triangle, draw))
1518                         {
1519                                 primitive->area = 0.5f * d;
1520
1521                                 primitive++;
1522                                 visible++;
1523                         }
1524
1525                         triangle++;
1526                 }
1527
1528                 return visible;
1529         }
1530         
1531         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1532         {
1533                 Triangle *triangle = renderer->triangleBatch[unit];
1534                 Primitive *primitive = renderer->primitiveBatch[unit];
1535                 int visible = 0;
1536
1537                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1538                 SetupProcessor::State &state = draw.setupState;
1539
1540                 const Vertex &v0 = triangle[0].v0;
1541                 const Vertex &v1 = triangle[0].v1;
1542                 const Vertex &v2 = triangle[0].v2;
1543
1544                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1545
1546                 if(state.cullMode == CULL_CLOCKWISE)
1547                 {
1548                         if(d >= 0) return 0;
1549                 }
1550                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1551                 {
1552                         if(d <= 0) return 0;
1553                 }
1554
1555                 // Copy attributes
1556                 triangle[1].v0 = v1;
1557                 triangle[2].v0 = v2;
1558
1559                 for(int i = 0; i < 3; i++)
1560                 {
1561                         if(setupPoint(renderer, *primitive, *triangle, draw))
1562                         {
1563                                 primitive->area = 0.5f * d;
1564
1565                                 primitive++;
1566                                 visible++;
1567                         }
1568
1569                         triangle++;
1570                 }
1571
1572                 return visible;
1573         }
1574
1575         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1576         {
1577                 Triangle *triangle = renderer->triangleBatch[unit];
1578                 Primitive *primitive = renderer->primitiveBatch[unit];
1579                 int visible = 0;
1580
1581                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1582                 SetupProcessor::State &state = draw.setupState;
1583
1584                 int ms = state.multiSample;
1585
1586                 for(int i = 0; i < count; i++)
1587                 {
1588                         if(setupLine(renderer, *primitive, *triangle, draw))
1589                         {
1590                                 primitive += ms;
1591                                 visible++;
1592                         }
1593
1594                         triangle++;
1595                 }
1596
1597                 return visible;
1598         }
1599
1600         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1601         {
1602                 Triangle *triangle = renderer->triangleBatch[unit];
1603                 Primitive *primitive = renderer->primitiveBatch[unit];
1604                 int visible = 0;
1605
1606                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1607                 SetupProcessor::State &state = draw.setupState;
1608
1609                 int ms = state.multiSample;
1610
1611                 for(int i = 0; i < count; i++)
1612                 {
1613                         if(setupPoint(renderer, *primitive, *triangle, draw))
1614                         {
1615                                 primitive += ms;
1616                                 visible++;
1617                         }
1618
1619                         triangle++;
1620                 }
1621
1622                 return visible;
1623         }
1624
1625         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1626         {
1627                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1628                 const SetupProcessor::State &state = draw.setupState;
1629                 const DrawData &data = *draw.data;
1630
1631                 float lineWidth = data.lineWidth;
1632
1633                 Vertex &v0 = triangle.v0;
1634                 Vertex &v1 = triangle.v1;
1635
1636                 int pos = state.positionRegister;
1637
1638                 const float4 &P0 = v0.v[pos];
1639                 const float4 &P1 = v1.v[pos];
1640
1641                 if(P0.w <= 0 && P1.w <= 0)
1642                 {
1643                         return false;
1644                 }
1645
1646                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1647                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1648
1649                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1650                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1651
1652                 if(dx == 0 && dy == 0)
1653                 {
1654                         return false;
1655                 }
1656
1657                 if(false)   // Rectangle
1658                 {
1659                         float4 P[4];
1660                         int C[4];
1661
1662                         P[0] = P0;
1663                         P[1] = P1;
1664                         P[2] = P1;
1665                         P[3] = P0;
1666
1667                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1668
1669                         dx *= scale;
1670                         dy *= scale;
1671
1672                         float dx0w = dx * P0.w / W;
1673                         float dy0h = dy * P0.w / H;
1674                         float dx0h = dx * P0.w / H;
1675                         float dy0w = dy * P0.w / W;
1676
1677                         float dx1w = dx * P1.w / W;
1678                         float dy1h = dy * P1.w / H;
1679                         float dx1h = dx * P1.w / H;
1680                         float dy1w = dy * P1.w / W;
1681
1682                         P[0].x += -dy0w + -dx0w;
1683                         P[0].y += -dx0h + +dy0h;
1684                         C[0] = computeClipFlags(P[0], data);
1685
1686                         P[1].x += -dy1w + +dx1w;
1687                         P[1].y += -dx1h + +dy1h;
1688                         C[1] = computeClipFlags(P[1], data);
1689
1690                         P[2].x += +dy1w + +dx1w;
1691                         P[2].y += +dx1h + -dy1h;
1692                         C[2] = computeClipFlags(P[2], data);
1693
1694                         P[3].x += +dy0w + -dx0w;
1695                         P[3].y += +dx0h + +dy0h;
1696                         C[3] = computeClipFlags(P[3], data);
1697
1698                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1699                         {
1700                                 Polygon polygon(P, 4);
1701
1702                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1703
1704                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1705                                 {
1706                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1707                                         {
1708                                                 return false;
1709                                         }
1710                                 }
1711
1712                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1713                         }
1714                 }
1715                 else   // Diamond test convention
1716                 {
1717                         float4 P[8];
1718                         int C[8];
1719
1720                         P[0] = P0;
1721                         P[1] = P0;
1722                         P[2] = P0;
1723                         P[3] = P0;
1724                         P[4] = P1;
1725                         P[5] = P1;
1726                         P[6] = P1;
1727                         P[7] = P1;
1728
1729                         float dx0 = lineWidth * 0.5f * P0.w / W;
1730                         float dy0 = lineWidth * 0.5f * P0.w / H;
1731
1732                         float dx1 = lineWidth * 0.5f * P1.w / W;
1733                         float dy1 = lineWidth * 0.5f * P1.w / H;
1734
1735                         P[0].x += -dx0;
1736                         C[0] = computeClipFlags(P[0], data);
1737
1738                         P[1].y += +dy0;
1739                         C[1] = computeClipFlags(P[1], data);
1740
1741                         P[2].x += +dx0;
1742                         C[2] = computeClipFlags(P[2], data);
1743
1744                         P[3].y += -dy0;
1745                         C[3] = computeClipFlags(P[3], data);
1746
1747                         P[4].x += -dx1;
1748                         C[4] = computeClipFlags(P[4], data);
1749
1750                         P[5].y += +dy1;
1751                         C[5] = computeClipFlags(P[5], data);
1752
1753                         P[6].x += +dx1;
1754                         C[6] = computeClipFlags(P[6], data);
1755
1756                         P[7].y += -dy1;
1757                         C[7] = computeClipFlags(P[7], data);
1758
1759                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1760                         {
1761                                 float4 L[6];
1762
1763                                 if(dx > -dy)
1764                                 {
1765                                         if(dx > dy)   // Right
1766                                         {
1767                                                 L[0] = P[0];
1768                                                 L[1] = P[1];
1769                                                 L[2] = P[5];
1770                                                 L[3] = P[6];
1771                                                 L[4] = P[7];
1772                                                 L[5] = P[3];
1773                                         }
1774                                         else   // Down
1775                                         {
1776                                                 L[0] = P[0];
1777                                                 L[1] = P[4];
1778                                                 L[2] = P[5];
1779                                                 L[3] = P[6];
1780                                                 L[4] = P[2];
1781                                                 L[5] = P[3];
1782                                         }
1783                                 }
1784                                 else
1785                                 {
1786                                         if(dx > dy)   // Up
1787                                         {
1788                                                 L[0] = P[0];
1789                                                 L[1] = P[1];
1790                                                 L[2] = P[2];
1791                                                 L[3] = P[6];
1792                                                 L[4] = P[7];
1793                                                 L[5] = P[4];
1794                                         }
1795                                         else   // Left
1796                                         {
1797                                                 L[0] = P[1];
1798                                                 L[1] = P[2];
1799                                                 L[2] = P[3];
1800                                                 L[3] = P[7];
1801                                                 L[4] = P[4];
1802                                                 L[5] = P[5];
1803                                         }
1804                                 }
1805
1806                                 Polygon polygon(L, 6);
1807
1808                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1809
1810                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1811                                 {
1812                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1813                                         {
1814                                                 return false;
1815                                         }
1816                                 }
1817
1818                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1819                         }
1820                 }
1821
1822                 return false;
1823         }
1824
1825         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1826         {
1827                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1828                 const SetupProcessor::State &state = draw.setupState;
1829                 const DrawData &data = *draw.data;
1830
1831                 Vertex &v = triangle.v0;
1832
1833                 float pSize;
1834
1835                 int pts = state.pointSizeRegister;
1836
1837                 if(state.pointSizeRegister != 0xF)
1838                 {
1839                         pSize = v.v[pts].y;
1840                 }
1841                 else
1842                 {
1843                         pSize = data.point.pointSize[0];
1844                 }
1845
1846                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1847
1848                 float4 P[4];
1849                 int C[4];
1850
1851                 int pos = state.positionRegister;
1852
1853                 P[0] = v.v[pos];
1854                 P[1] = v.v[pos];
1855                 P[2] = v.v[pos];
1856                 P[3] = v.v[pos];
1857
1858                 const float X = pSize * P[0].w * data.halfPixelX[0];
1859                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1860
1861                 P[0].x -= X;
1862                 P[0].y += Y;
1863                 C[0] = computeClipFlags(P[0], data);
1864
1865                 P[1].x += X;
1866                 P[1].y += Y;
1867                 C[1] = computeClipFlags(P[1], data);
1868
1869                 P[2].x += X;
1870                 P[2].y -= Y;
1871                 C[2] = computeClipFlags(P[2], data);
1872
1873                 P[3].x -= X;
1874                 P[3].y -= Y;
1875                 C[3] = computeClipFlags(P[3], data);
1876
1877                 triangle.v1 = triangle.v0;
1878                 triangle.v2 = triangle.v0;
1879
1880                 triangle.v1.X += iround(16 * 0.5f * pSize);
1881                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1882
1883                 Polygon polygon(P, 4);
1884
1885                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1886                 {
1887                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1888
1889                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1890                         {
1891                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1892                                 {
1893                                         return false;
1894                                 }
1895                         }
1896                         
1897                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1898                 }
1899
1900                 return false;
1901         }
1902
1903         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1904         {
1905                 return ((v.x > v.w)  << 0) |
1906                            ((v.y > v.w)  << 1) |
1907                            ((v.z > v.w)  << 2) |
1908                            ((v.x < -v.w) << 3) |
1909                        ((v.y < -v.w) << 4) |
1910                            ((v.z < 0)    << 5) |
1911                            Clipper::CLIP_FINITE;   // FIXME: xyz finite
1912         }
1913
1914         void Renderer::initializeThreads()
1915         {
1916                 unitCount = ceilPow2(threadCount);
1917                 clusterCount = ceilPow2(threadCount);
1918
1919                 for(int i = 0; i < unitCount; i++)
1920                 {
1921                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1922                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1923                 }
1924
1925                 for(int i = 0; i < threadCount; i++)
1926                 {
1927                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1928                         vertexTask[i]->vertexCache.drawCall = -1;
1929
1930                         task[i].type = Task::SUSPEND;
1931
1932                         resume[i] = new Event();
1933                         suspend[i] = new Event();
1934
1935                         Parameters parameters;
1936                         parameters.threadIndex = i;
1937                         parameters.renderer = this;
1938
1939                         exitThreads = false;
1940                         worker[i] = new Thread(threadFunction, &parameters);
1941
1942                         suspend[i]->wait();
1943                         suspend[i]->signal();
1944                 }
1945         }
1946
1947         void Renderer::terminateThreads()
1948         {
1949                 while(threadsAwake != 0)
1950                 {
1951                         Thread::sleep(1);
1952                 }
1953
1954                 for(int thread = 0; thread < threadCount; thread++)
1955                 {
1956                         if(worker[thread])
1957                         {
1958                                 exitThreads = true;
1959                                 resume[thread]->signal();
1960                                 worker[thread]->join();
1961                                 
1962                                 delete worker[thread];
1963                                 worker[thread] = 0;
1964                                 delete resume[thread];
1965                                 resume[thread] = 0;
1966                                 delete suspend[thread];
1967                                 suspend[thread] = 0;
1968                         }
1969                 
1970                         deallocate(vertexTask[thread]);
1971                         vertexTask[thread] = 0;
1972                 }
1973
1974                 for(int i = 0; i < 16; i++)
1975                 {
1976                         deallocate(triangleBatch[i]);
1977                         triangleBatch[i] = 0;
1978
1979                         deallocate(primitiveBatch[i]);
1980                         primitiveBatch[i] = 0;
1981                 }
1982         }
1983
1984         void Renderer::loadConstants(const VertexShader *vertexShader)
1985         {
1986                 if(!vertexShader) return;
1987
1988                 size_t count = vertexShader->getLength();
1989
1990                 for(size_t i = 0; i < count; i++)
1991                 {
1992                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1993
1994                         if(instruction->opcode == Shader::OPCODE_DEF)
1995                         {
1996                                 int index = instruction->dst.index;
1997                                 float value[4];
1998
1999                                 value[0] = instruction->src[0].value[0];
2000                                 value[1] = instruction->src[0].value[1];
2001                                 value[2] = instruction->src[0].value[2];
2002                                 value[3] = instruction->src[0].value[3];
2003
2004                                 setVertexShaderConstantF(index, value);
2005                         }
2006                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2007                         {
2008                                 int index = instruction->dst.index;
2009                                 int integer[4];
2010
2011                                 integer[0] = instruction->src[0].integer[0];
2012                                 integer[1] = instruction->src[0].integer[1];
2013                                 integer[2] = instruction->src[0].integer[2];
2014                                 integer[3] = instruction->src[0].integer[3];
2015
2016                                 setVertexShaderConstantI(index, integer);
2017                         }
2018                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2019                         {
2020                                 int index = instruction->dst.index;
2021                                 int boolean = instruction->src[0].boolean[0];
2022
2023                                 setVertexShaderConstantB(index, &boolean);
2024                         }
2025                 }
2026         }
2027
2028         void Renderer::loadConstants(const PixelShader *pixelShader)
2029         {
2030                 if(!pixelShader) return;
2031
2032                 size_t count = pixelShader->getLength();
2033
2034                 for(size_t i = 0; i < count; i++)
2035                 {
2036                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2037
2038                         if(instruction->opcode == Shader::OPCODE_DEF)
2039                         {
2040                                 int index = instruction->dst.index;
2041                                 float value[4];
2042
2043                                 value[0] = instruction->src[0].value[0];
2044                                 value[1] = instruction->src[0].value[1];
2045                                 value[2] = instruction->src[0].value[2];
2046                                 value[3] = instruction->src[0].value[3];
2047
2048                                 setPixelShaderConstantF(index, value);
2049                         }
2050                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2051                         {
2052                                 int index = instruction->dst.index;
2053                                 int integer[4];
2054
2055                                 integer[0] = instruction->src[0].integer[0];
2056                                 integer[1] = instruction->src[0].integer[1];
2057                                 integer[2] = instruction->src[0].integer[2];
2058                                 integer[3] = instruction->src[0].integer[3];
2059
2060                                 setPixelShaderConstantI(index, integer);
2061                         }
2062                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2063                         {
2064                                 int index = instruction->dst.index;
2065                                 int boolean = instruction->src[0].boolean[0];
2066
2067                                 setPixelShaderConstantB(index, &boolean);
2068                         }
2069                 }
2070         }
2071
2072         void Renderer::setIndexBuffer(Resource *indexBuffer)
2073         {
2074                 context->indexBuffer = indexBuffer;
2075         }
2076
2077         void Renderer::setMultiSampleMask(unsigned int mask)
2078         {
2079                 context->sampleMask = mask;
2080         }
2081
2082         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2083         {
2084                 sw::transparencyAntialiasing = transparencyAntialiasing;
2085         }
2086
2087         bool Renderer::isReadWriteTexture(int sampler)
2088         {
2089                 for(int index = 0; index < RENDERTARGETS; index++)
2090                 {
2091                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2092                         {
2093                                 return true;
2094                         }
2095                 }
2096         
2097                 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2098                 {
2099                         return true;
2100                 }
2101
2102                 return false;
2103         }
2104         
2105         void Renderer::updateClipper()
2106         {
2107                 if(updateClipPlanes)
2108                 {
2109                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2110                         {
2111                                 const Matrix &scissorWorld = getViewTransform();
2112
2113                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2114                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2115                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2116                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2117                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2118                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2119                         }
2120                         else   // User plane in clip space
2121                         {
2122                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2123                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2124                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2125                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2126                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2127                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2128                         }
2129
2130                         updateClipPlanes = false;
2131                 }
2132         }
2133
2134         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2135         {
2136                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2137
2138                 context->texture[sampler] = resource;
2139         }
2140
2141         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2142         {
2143                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2144                 
2145                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2146         }
2147
2148         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2149         {
2150                 if(type == SAMPLER_PIXEL)
2151                 {
2152                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2153                 }
2154                 else
2155                 {
2156                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2157                 }
2158         }
2159
2160         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2161         {
2162                 if(type == SAMPLER_PIXEL)
2163                 {
2164                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2165                 }
2166                 else
2167                 {
2168                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2169                 }
2170         }
2171
2172         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2173         {
2174                 if(type == SAMPLER_PIXEL)
2175                 {
2176                         PixelProcessor::setGatherEnable(sampler, enable);
2177                 }
2178                 else
2179                 {
2180                         VertexProcessor::setGatherEnable(sampler, enable);
2181                 }
2182         }
2183
2184         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2185         {
2186                 if(type == SAMPLER_PIXEL)
2187                 {
2188                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2189                 }
2190                 else
2191                 {
2192                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2193                 }
2194         }
2195
2196         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2197         {
2198                 if(type == SAMPLER_PIXEL)
2199                 {
2200                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2201                 }
2202                 else
2203                 {
2204                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2205                 }
2206         }
2207
2208         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2209         {
2210                 if(type == SAMPLER_PIXEL)
2211                 {
2212                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2213                 }
2214                 else
2215                 {
2216                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2217                 }
2218         }
2219
2220         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2221         {
2222                 if(type == SAMPLER_PIXEL)
2223                 {
2224                         PixelProcessor::setReadSRGB(sampler, sRGB);
2225                 }
2226                 else
2227                 {
2228                         VertexProcessor::setReadSRGB(sampler, sRGB);
2229                 }
2230         }
2231
2232         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2233         {
2234                 if(type == SAMPLER_PIXEL)
2235                 {
2236                         PixelProcessor::setMipmapLOD(sampler, bias);
2237                 }
2238                 else
2239                 {
2240                         VertexProcessor::setMipmapLOD(sampler, bias);
2241                 }
2242         }
2243
2244         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2245         {
2246                 if(type == SAMPLER_PIXEL)
2247                 {
2248                         PixelProcessor::setBorderColor(sampler, borderColor);
2249                 }
2250                 else
2251                 {
2252                         VertexProcessor::setBorderColor(sampler, borderColor);
2253                 }
2254         }
2255
2256         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2257         {
2258                 if(type == SAMPLER_PIXEL)
2259                 {
2260                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2261                 }
2262                 else
2263                 {
2264                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2265                 }
2266         }
2267
2268         void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2269         {
2270                 if(type == SAMPLER_PIXEL)
2271                 {
2272                         PixelProcessor::setSwizzleR(sampler, swizzleR);
2273                 }
2274                 else
2275                 {
2276                         VertexProcessor::setSwizzleR(sampler, swizzleR);
2277                 }
2278         }
2279
2280         void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2281         {
2282                 if(type == SAMPLER_PIXEL)
2283                 {
2284                         PixelProcessor::setSwizzleG(sampler, swizzleG);
2285                 }
2286                 else
2287                 {
2288                         VertexProcessor::setSwizzleG(sampler, swizzleG);
2289                 }
2290         }
2291
2292         void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2293         {
2294                 if(type == SAMPLER_PIXEL)
2295                 {
2296                         PixelProcessor::setSwizzleB(sampler, swizzleB);
2297                 }
2298                 else
2299                 {
2300                         VertexProcessor::setSwizzleB(sampler, swizzleB);
2301                 }
2302         }
2303
2304         void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2305         {
2306                 if(type == SAMPLER_PIXEL)
2307                 {
2308                         PixelProcessor::setSwizzleA(sampler, swizzleA);
2309                 }
2310                 else
2311                 {
2312                         VertexProcessor::setSwizzleA(sampler, swizzleA);
2313                 }
2314         }
2315
2316         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2317         {
2318                 context->setPointSpriteEnable(pointSpriteEnable);
2319         }
2320
2321         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2322         {
2323                 context->setPointScaleEnable(pointScaleEnable);
2324         }
2325
2326         void Renderer::setLineWidth(float width)
2327         {
2328                 context->lineWidth = width;
2329         }
2330
2331         void Renderer::setDepthBias(float bias)
2332         {
2333                 depthBias = bias;
2334         }
2335
2336         void Renderer::setSlopeDepthBias(float slopeBias)
2337         {
2338                 slopeDepthBias = slopeBias;
2339         }
2340
2341         void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2342         {
2343                 context->rasterizerDiscard = rasterizerDiscard;
2344         }
2345
2346         void Renderer::setPixelShader(const PixelShader *shader)
2347         {
2348                 context->pixelShader = shader;
2349
2350                 loadConstants(shader);
2351         }
2352
2353         void Renderer::setVertexShader(const VertexShader *shader)
2354         {
2355                 context->vertexShader = shader;
2356
2357                 loadConstants(shader);
2358         }
2359
2360         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2361         {
2362                 for(int i = 0; i < DRAW_COUNT; i++)
2363                 {
2364                         if(drawCall[i]->psDirtyConstF < index + count)
2365                         {
2366                                 drawCall[i]->psDirtyConstF = index + count;
2367                         }
2368                 }
2369
2370                 for(int i = 0; i < count; i++)
2371                 {
2372                         PixelProcessor::setFloatConstant(index + i, value);
2373                         value += 4;
2374                 }
2375         }
2376
2377         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2378         {
2379                 for(int i = 0; i < DRAW_COUNT; i++)
2380                 {
2381                         if(drawCall[i]->psDirtyConstI < index + count)
2382                         {
2383                                 drawCall[i]->psDirtyConstI = index + count;
2384                         }
2385                 }
2386
2387                 for(int i = 0; i < count; i++)
2388                 {
2389                         PixelProcessor::setIntegerConstant(index + i, value);
2390                         value += 4;
2391                 }
2392         }
2393
2394         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2395         {
2396                 for(int i = 0; i < DRAW_COUNT; i++)
2397                 {
2398                         if(drawCall[i]->psDirtyConstB < index + count)
2399                         {
2400                                 drawCall[i]->psDirtyConstB = index + count;
2401                         }
2402                 }
2403
2404                 for(int i = 0; i < count; i++)
2405                 {
2406                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2407                         boolean++;
2408                 }
2409         }
2410
2411         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2412         {
2413                 for(int i = 0; i < DRAW_COUNT; i++)
2414                 {
2415                         if(drawCall[i]->vsDirtyConstF < index + count)
2416                         {
2417                                 drawCall[i]->vsDirtyConstF = index + count;
2418                         }
2419                 }
2420
2421                 for(int i = 0; i < count; i++)
2422                 {
2423                         VertexProcessor::setFloatConstant(index + i, value);
2424                         value += 4;
2425                 }
2426         }
2427
2428         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2429         {
2430                 for(int i = 0; i < DRAW_COUNT; i++)
2431                 {
2432                         if(drawCall[i]->vsDirtyConstI < index + count)
2433                         {
2434                                 drawCall[i]->vsDirtyConstI = index + count;
2435                         }
2436                 }
2437
2438                 for(int i = 0; i < count; i++)
2439                 {
2440                         VertexProcessor::setIntegerConstant(index + i, value);
2441                         value += 4;
2442                 }
2443         }
2444
2445         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2446         {
2447                 for(int i = 0; i < DRAW_COUNT; i++)
2448                 {
2449                         if(drawCall[i]->vsDirtyConstB < index + count)
2450                         {
2451                                 drawCall[i]->vsDirtyConstB = index + count;
2452                         }
2453                 }
2454
2455                 for(int i = 0; i < count; i++)
2456                 {
2457                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2458                         boolean++;
2459                 }
2460         }
2461
2462         void Renderer::setModelMatrix(const Matrix &M, int i)
2463         {
2464                 VertexProcessor::setModelMatrix(M, i);
2465         }
2466
2467         void Renderer::setViewMatrix(const Matrix &V)
2468         {
2469                 VertexProcessor::setViewMatrix(V);
2470                 updateClipPlanes = true;
2471         }
2472
2473         void Renderer::setBaseMatrix(const Matrix &B)
2474         {
2475                 VertexProcessor::setBaseMatrix(B);
2476                 updateClipPlanes = true;
2477         }
2478
2479         void Renderer::setProjectionMatrix(const Matrix &P)
2480         {
2481                 VertexProcessor::setProjectionMatrix(P);
2482                 updateClipPlanes = true;
2483         }
2484
2485         void Renderer::addQuery(Query *query)
2486         {
2487                 queries.push_back(query);
2488         }
2489         
2490         void Renderer::removeQuery(Query *query)
2491         {
2492                 queries.remove(query);
2493         }
2494
2495         #if PERF_HUD
2496                 int Renderer::getThreadCount()
2497                 {
2498                         return threadCount;
2499                 }
2500                 
2501                 int64_t Renderer::getVertexTime(int thread)
2502                 {
2503                         return vertexTime[thread];
2504                 }
2505
2506                 int64_t Renderer::getSetupTime(int thread)
2507                 {
2508                         return setupTime[thread];
2509                 }
2510                         
2511                 int64_t Renderer::getPixelTime(int thread)
2512                 {
2513                         return pixelTime[thread];
2514                 }
2515
2516                 void Renderer::resetTimers()
2517                 {
2518                         for(int thread = 0; thread < threadCount; thread++)
2519                         {
2520                                 vertexTime[thread] = 0;
2521                                 setupTime[thread] = 0;
2522                                 pixelTime[thread] = 0;
2523                         }
2524                 }
2525         #endif
2526
2527         void Renderer::setViewport(const Viewport &viewport)
2528         {
2529                 this->viewport = viewport;
2530         }
2531
2532         void Renderer::setScissor(const Rect &scissor)
2533         {
2534                 this->scissor = scissor;
2535         }
2536
2537         void Renderer::setClipFlags(int flags)
2538         {
2539                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2540         }
2541
2542         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2543         {
2544                 if(index < MAX_CLIP_PLANES)
2545                 {
2546                         userPlane[index] = plane;
2547                 }
2548                 else ASSERT(false);
2549
2550                 updateClipPlanes = true;
2551         }
2552
2553         void Renderer::updateConfiguration(bool initialUpdate)
2554         {
2555                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2556
2557                 if(newConfiguration || initialUpdate)
2558                 {
2559                         terminateThreads();
2560
2561                         SwiftConfig::Configuration configuration = {};
2562                         swiftConfig->getConfiguration(configuration);
2563
2564                         precacheVertex = !newConfiguration && configuration.precache;
2565                         precacheSetup = !newConfiguration && configuration.precache;
2566                         precachePixel = !newConfiguration && configuration.precache;
2567
2568                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2569                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2570                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2571
2572                         switch(configuration.textureSampleQuality)
2573                         {
2574                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2575                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2576                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2577                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2578                         }
2579
2580                         switch(configuration.mipmapQuality)
2581                         {
2582                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2583                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2584                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2585                         }
2586
2587                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2588
2589                         switch(configuration.transcendentalPrecision)
2590                         {
2591                         case 0:
2592                                 logPrecision = APPROXIMATE;
2593                                 expPrecision = APPROXIMATE;
2594                                 rcpPrecision = APPROXIMATE;
2595                                 rsqPrecision = APPROXIMATE;
2596                                 break;
2597                         case 1:
2598                                 logPrecision = PARTIAL;
2599                                 expPrecision = PARTIAL;
2600                                 rcpPrecision = PARTIAL;
2601                                 rsqPrecision = PARTIAL;
2602                                 break;
2603                         case 2:
2604                                 logPrecision = ACCURATE;
2605                                 expPrecision = ACCURATE;
2606                                 rcpPrecision = ACCURATE;
2607                                 rsqPrecision = ACCURATE;
2608                                 break;
2609                         case 3:
2610                                 logPrecision = WHQL;
2611                                 expPrecision = WHQL;
2612                                 rcpPrecision = WHQL;
2613                                 rsqPrecision = WHQL;
2614                                 break;
2615                         case 4:
2616                                 logPrecision = IEEE;
2617                                 expPrecision = IEEE;
2618                                 rcpPrecision = IEEE;
2619                                 rsqPrecision = IEEE;
2620                                 break;
2621                         default:
2622                                 logPrecision = ACCURATE;
2623                                 expPrecision = ACCURATE;
2624                                 rcpPrecision = ACCURATE;
2625                                 rsqPrecision = ACCURATE;
2626                                 break;
2627                         }
2628
2629                         switch(configuration.transparencyAntialiasing)
2630                         {
2631                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2632                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2633                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2634                         }
2635
2636                         switch(configuration.threadCount)
2637                         {
2638                         case -1: threadCount = CPUID::coreCount();        break;
2639                         case 0:  threadCount = CPUID::processAffinity();  break;
2640                         default: threadCount = configuration.threadCount; break;
2641                         }
2642
2643                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2644                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2645                         CPUID::setEnableSSE3(configuration.enableSSE3);
2646                         CPUID::setEnableSSE2(configuration.enableSSE2);
2647                         CPUID::setEnableSSE(configuration.enableSSE);
2648
2649                         for(int pass = 0; pass < 10; pass++)
2650                         {
2651                                 optimization[pass] = configuration.optimization[pass];
2652                         }
2653
2654                         forceWindowed = configuration.forceWindowed;
2655                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2656                         postBlendSRGB = configuration.postBlendSRGB;
2657                         exactColorRounding = configuration.exactColorRounding;
2658                         forceClearRegisters = configuration.forceClearRegisters;
2659
2660                 #ifndef NDEBUG
2661                         minPrimitives = configuration.minPrimitives;
2662                         maxPrimitives = configuration.maxPrimitives;
2663                 #endif
2664                 }
2665
2666                 if(!initialUpdate && !worker[0])
2667                 {
2668                         initializeThreads();
2669                 }
2670         }
2671 }