OSDN Git Service

Fix clearing all samples of multisample render targets.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Math.hpp"
19 #include "FrameBuffer.hpp"
20 #include "Timer.hpp"
21 #include "Surface.hpp"
22 #include "Half.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
27 #include "CPUID.hpp"
28 #include "Memory.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
31 #include "Debug.hpp"
32 #include "Reactor/Reactor.hpp"
33
34 #undef max
35
36 bool disableServer = true;
37
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42
43 namespace sw
44 {
45         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47         extern bool booleanFaceRegister;
48         extern bool fullPixelPositionRegister;
49         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50         extern bool secondaryColor;             // Specular lighting is applied after texturing
51         extern bool colorsDefaultToZero;
52
53         extern bool forceWindowed;
54         extern bool complementaryDepthBuffer;
55         extern bool postBlendSRGB;
56         extern bool exactColorRounding;
57         extern TransparencyAntialiasing transparencyAntialiasing;
58         extern bool forceClearRegisters;
59
60         extern bool precacheVertex;
61         extern bool precacheSetup;
62         extern bool precachePixel;
63
64         int batchSize = 128;
65         int threadCount = 1;
66         int unitCount = 1;
67         int clusterCount = 1;
68
69         TranscendentalPrecision logPrecision = ACCURATE;
70         TranscendentalPrecision expPrecision = ACCURATE;
71         TranscendentalPrecision rcpPrecision = ACCURATE;
72         TranscendentalPrecision rsqPrecision = ACCURATE;
73         bool perspectiveCorrection = true;
74
75         struct Parameters
76         {
77                 Renderer *renderer;
78                 int threadIndex;
79         };
80
81         DrawCall::DrawCall()
82         {
83                 queries = 0;
84
85                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
86                 vsDirtyConstI = 16;
87                 vsDirtyConstB = 16;
88
89                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
90                 psDirtyConstI = 16;
91                 psDirtyConstB = 16;
92
93                 references = -1;
94
95                 data = (DrawData*)allocate(sizeof(DrawData));
96                 data->constants = &constants;
97         }
98
99         DrawCall::~DrawCall()
100         {
101                 delete queries;
102
103                 deallocate(data);
104         }
105
106         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
107         {
108                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
109                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
110                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
111                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
112                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
113                 sw::secondaryColor = conventions.secondaryColor;
114                 sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
115                 sw::exactColorRounding = exactColorRounding;
116
117                 setRenderTarget(0, 0);
118                 clipper = new Clipper(symmetricNormalizedDepth);
119                 blitter = new Blitter;
120
121                 updateViewMatrix = true;
122                 updateBaseMatrix = true;
123                 updateProjectionMatrix = true;
124                 updateClipPlanes = true;
125
126                 #if PERF_HUD
127                         resetTimers();
128                 #endif
129
130                 for(int i = 0; i < 16; i++)
131                 {
132                         vertexTask[i] = 0;
133
134                         worker[i] = 0;
135                         resume[i] = 0;
136                         suspend[i] = 0;
137                 }
138
139                 threadsAwake = 0;
140                 resumeApp = new Event();
141
142                 currentDraw = 0;
143                 nextDraw = 0;
144
145                 qHead = 0;
146                 qSize = 0;
147
148                 for(int i = 0; i < 16; i++)
149                 {
150                         triangleBatch[i] = 0;
151                         primitiveBatch[i] = 0;
152                 }
153
154                 for(int draw = 0; draw < DRAW_COUNT; draw++)
155                 {
156                         drawCall[draw] = new DrawCall();
157                         drawList[draw] = drawCall[draw];
158                 }
159
160                 for(int unit = 0; unit < 16; unit++)
161                 {
162                         primitiveProgress[unit].init();
163                 }
164
165                 for(int cluster = 0; cluster < 16; cluster++)
166                 {
167                         pixelProgress[cluster].init();
168                 }
169
170                 clipFlags = 0;
171
172                 swiftConfig = new SwiftConfig(disableServer);
173                 updateConfiguration(true);
174
175                 sync = new Resource(0);
176         }
177
178         Renderer::~Renderer()
179         {
180                 sync->destruct();
181
182                 delete clipper;
183                 clipper = nullptr;
184
185                 delete blitter;
186                 blitter = nullptr;
187
188                 terminateThreads();
189                 delete resumeApp;
190
191                 for(int draw = 0; draw < DRAW_COUNT; draw++)
192                 {
193                         delete drawCall[draw];
194                 }
195
196                 delete swiftConfig;
197         }
198
199         // This object has to be mem aligned
200         void* Renderer::operator new(size_t size)
201         {
202                 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
203                 return sw::allocate(sizeof(Renderer), 16);
204         }
205
206         void Renderer::operator delete(void * mem)
207         {
208                 sw::deallocate(mem);
209         }
210
211         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
212         {
213                 #ifndef NDEBUG
214                         if(count < minPrimitives || count > maxPrimitives)
215                         {
216                                 return;
217                         }
218                 #endif
219
220                 context->drawType = drawType;
221
222                 updateConfiguration();
223                 updateClipper();
224
225                 int ss = context->getSuperSampleCount();
226                 int ms = context->getMultiSampleCount();
227
228                 for(int q = 0; q < ss; q++)
229                 {
230                         unsigned int oldMultiSampleMask = context->multiSampleMask;
231                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
232
233                         if(!context->multiSampleMask)
234                         {
235                                 continue;
236                         }
237
238                         sync->lock(sw::PRIVATE);
239
240                         if(update || oldMultiSampleMask != context->multiSampleMask)
241                         {
242                                 vertexState = VertexProcessor::update(drawType);
243                                 setupState = SetupProcessor::update();
244                                 pixelState = PixelProcessor::update();
245
246                                 vertexRoutine = VertexProcessor::routine(vertexState);
247                                 setupRoutine = SetupProcessor::routine(setupState);
248                                 pixelRoutine = PixelProcessor::routine(pixelState);
249                         }
250
251                         int batch = batchSize / ms;
252
253                         int (Renderer::*setupPrimitives)(int batch, int count);
254
255                         if(context->isDrawTriangle())
256                         {
257                                 switch(context->fillMode)
258                                 {
259                                 case FILL_SOLID:
260                                         setupPrimitives = &Renderer::setupSolidTriangles;
261                                         break;
262                                 case FILL_WIREFRAME:
263                                         setupPrimitives = &Renderer::setupWireframeTriangle;
264                                         batch = 1;
265                                         break;
266                                 case FILL_VERTEX:
267                                         setupPrimitives = &Renderer::setupVertexTriangle;
268                                         batch = 1;
269                                         break;
270                                 default:
271                                         ASSERT(false);
272                                         return;
273                                 }
274                         }
275                         else if(context->isDrawLine())
276                         {
277                                 setupPrimitives = &Renderer::setupLines;
278                         }
279                         else   // Point draw
280                         {
281                                 setupPrimitives = &Renderer::setupPoints;
282                         }
283
284                         DrawCall *draw = 0;
285
286                         do
287                         {
288                                 for(int i = 0; i < DRAW_COUNT; i++)
289                                 {
290                                         if(drawCall[i]->references == -1)
291                                         {
292                                                 draw = drawCall[i];
293                                                 drawList[nextDraw % DRAW_COUNT] = draw;
294
295                                                 break;
296                                         }
297                                 }
298
299                                 if(!draw)
300                                 {
301                                         resumeApp->wait();
302                                 }
303                         }
304                         while(!draw);
305
306                         DrawData *data = draw->data;
307
308                         if(queries.size() != 0)
309                         {
310                                 draw->queries = new std::list<Query*>();
311                                 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
312                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
313                                 {
314                                         Query* q = *query;
315                                         if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
316                                         {
317                                                 atomicIncrement(&(q->reference));
318                                                 draw->queries->push_back(q);
319                                         }
320                                 }
321                         }
322
323                         draw->drawType = drawType;
324                         draw->batchSize = batch;
325
326                         vertexRoutine->bind();
327                         setupRoutine->bind();
328                         pixelRoutine->bind();
329
330                         draw->vertexRoutine = vertexRoutine;
331                         draw->setupRoutine = setupRoutine;
332                         draw->pixelRoutine = pixelRoutine;
333                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
334                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
335                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
336                         draw->setupPrimitives = setupPrimitives;
337                         draw->setupState = setupState;
338
339                         for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
340                         {
341                                 draw->vertexStream[i] = context->input[i].resource;
342                                 data->input[i] = context->input[i].buffer;
343                                 data->stride[i] = context->input[i].stride;
344
345                                 if(draw->vertexStream[i])
346                                 {
347                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
348                                 }
349                         }
350
351                         if(context->indexBuffer)
352                         {
353                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
354                         }
355
356                         draw->indexBuffer = context->indexBuffer;
357
358                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
359                         {
360                                 draw->texture[sampler] = 0;
361                         }
362
363                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
364                         {
365                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
366                                 {
367                                         draw->texture[sampler] = context->texture[sampler];
368                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
369
370                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
371                                 }
372                         }
373
374                         if(context->pixelShader)
375                         {
376                                 if(draw->psDirtyConstF)
377                                 {
378                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
379                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
380                                         draw->psDirtyConstF = 0;
381                                 }
382
383                                 if(draw->psDirtyConstI)
384                                 {
385                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
386                                         draw->psDirtyConstI = 0;
387                                 }
388
389                                 if(draw->psDirtyConstB)
390                                 {
391                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
392                                         draw->psDirtyConstB = 0;
393                                 }
394
395                                 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
396                         }
397                         else
398                         {
399                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
400                                 {
401                                         draw->pUniformBuffers[i] = nullptr;
402                                 }
403                         }
404
405                         if(context->pixelShaderVersion() <= 0x0104)
406                         {
407                                 for(int stage = 0; stage < 8; stage++)
408                                 {
409                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
410                                         {
411                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
412                                         }
413                                         else break;
414                                 }
415                         }
416
417                         if(context->vertexShader)
418                         {
419                                 if(context->vertexShader->getVersion() >= 0x0300)
420                                 {
421                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
422                                         {
423                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
424                                                 {
425                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
426                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
427
428                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
429                                                 }
430                                         }
431                                 }
432
433                                 if(draw->vsDirtyConstF)
434                                 {
435                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
436                                         draw->vsDirtyConstF = 0;
437                                 }
438
439                                 if(draw->vsDirtyConstI)
440                                 {
441                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
442                                         draw->vsDirtyConstI = 0;
443                                 }
444
445                                 if(draw->vsDirtyConstB)
446                                 {
447                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
448                                         draw->vsDirtyConstB = 0;
449                                 }
450
451                                 if(context->vertexShader->isInstanceIdDeclared())
452                                 {
453                                         data->instanceID = context->instanceID;
454                                 }
455
456                                 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
457                                 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
458                         }
459                         else
460                         {
461                                 data->ff = ff;
462
463                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
464                                 draw->vsDirtyConstI = 16;
465                                 draw->vsDirtyConstB = 16;
466
467                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
468                                 {
469                                         draw->vUniformBuffers[i] = nullptr;
470                                 }
471
472                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
473                                 {
474                                         draw->transformFeedbackBuffers[i] = nullptr;
475                                 }
476                         }
477
478                         if(pixelState.stencilActive)
479                         {
480                                 data->stencil[0] = stencil;
481                                 data->stencil[1] = stencilCCW;
482                         }
483
484                         if(pixelState.fogActive)
485                         {
486                                 data->fog = fog;
487                         }
488
489                         if(setupState.isDrawPoint)
490                         {
491                                 data->point = point;
492                         }
493
494                         data->lineWidth = context->lineWidth;
495
496                         data->factor = factor;
497
498                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
499                         {
500                                 float ref = context->alphaReference * (1.0f / 255.0f);
501                                 float margin = sw::min(ref, 1.0f - ref);
502
503                                 if(ms == 4)
504                                 {
505                                         data->a2c0 = replicate(ref - margin * 0.6f);
506                                         data->a2c1 = replicate(ref - margin * 0.2f);
507                                         data->a2c2 = replicate(ref + margin * 0.2f);
508                                         data->a2c3 = replicate(ref + margin * 0.6f);
509                                 }
510                                 else if(ms == 2)
511                                 {
512                                         data->a2c0 = replicate(ref - margin * 0.3f);
513                                         data->a2c1 = replicate(ref + margin * 0.3f);
514                                 }
515                                 else ASSERT(false);
516                         }
517
518                         if(pixelState.occlusionEnabled)
519                         {
520                                 for(int cluster = 0; cluster < clusterCount; cluster++)
521                                 {
522                                         data->occlusion[cluster] = 0;
523                                 }
524                         }
525
526                         #if PERF_PROFILE
527                                 for(int cluster = 0; cluster < clusterCount; cluster++)
528                                 {
529                                         for(int i = 0; i < PERF_TIMERS; i++)
530                                         {
531                                                 data->cycles[i][cluster] = 0;
532                                         }
533                                 }
534                         #endif
535
536                         // Viewport
537                         {
538                                 float W = 0.5f * viewport.width;
539                                 float H = 0.5f * viewport.height;
540                                 float X0 = viewport.x0 + W;
541                                 float Y0 = viewport.y0 + H;
542                                 float N = viewport.minZ;
543                                 float F = viewport.maxZ;
544                                 float Z = F - N;
545
546                                 if(context->isDrawTriangle(false))
547                                 {
548                                         N += depthBias;
549                                 }
550
551                                 if(complementaryDepthBuffer)
552                                 {
553                                         Z = -Z;
554                                         N = 1 - N;
555                                 }
556
557                                 static const float X[5][16] =   // Fragment offsets
558                                 {
559                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
560                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
561                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
562                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
563                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
564                                 };
565
566                                 static const float Y[5][16] =   // Fragment offsets
567                                 {
568                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
569                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
570                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
571                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
572                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
573                                 };
574
575                                 int s = sw::log2(ss);
576
577                                 data->Wx16 = replicate(W * 16);
578                                 data->Hx16 = replicate(H * 16);
579                                 data->X0x16 = replicate(X0 * 16 - 8);
580                                 data->Y0x16 = replicate(Y0 * 16 - 8);
581                                 data->XXXX = replicate(X[s][q] / W);
582                                 data->YYYY = replicate(Y[s][q] / H);
583                                 data->halfPixelX = replicate(0.5f / W);
584                                 data->halfPixelY = replicate(0.5f / H);
585                                 data->viewportHeight = abs(viewport.height);
586                                 data->slopeDepthBias = slopeDepthBias;
587                                 data->depthRange = Z;
588                                 data->depthNear = N;
589                                 draw->clipFlags = clipFlags;
590
591                                 if(clipFlags)
592                                 {
593                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
594                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
595                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
596                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
597                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
598                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
599                                 }
600                         }
601
602                         // Target
603                         {
604                                 for(int index = 0; index < RENDERTARGETS; index++)
605                                 {
606                                         draw->renderTarget[index] = context->renderTarget[index];
607
608                                         if(draw->renderTarget[index])
609                                         {
610                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
611                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
612                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
613                                         }
614                                 }
615
616                                 draw->depthBuffer = context->depthBuffer;
617                                 draw->stencilBuffer = context->stencilBuffer;
618
619                                 if(draw->depthBuffer)
620                                 {
621                                         data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
622                                         data->depthPitchB = context->depthBuffer->getInternalPitchB();
623                                         data->depthSliceB = context->depthBuffer->getInternalSliceB();
624                                 }
625
626                                 if(draw->stencilBuffer)
627                                 {
628                                         data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
629                                         data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
630                                         data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
631                                 }
632                         }
633
634                         // Scissor
635                         {
636                                 data->scissorX0 = scissor.x0;
637                                 data->scissorX1 = scissor.x1;
638                                 data->scissorY0 = scissor.y0;
639                                 data->scissorY1 = scissor.y1;
640                         }
641
642                         draw->primitive = 0;
643                         draw->count = count;
644
645                         draw->references = (count + batch - 1) / batch;
646
647                         schedulerMutex.lock();
648                         nextDraw++;
649                         schedulerMutex.unlock();
650
651                         #ifndef NDEBUG
652                         if(threadCount == 1)   // Use main thread for draw execution
653                         {
654                                 threadsAwake = 1;
655                                 task[0].type = Task::RESUME;
656
657                                 taskLoop(0);
658                         }
659                         else
660                         #endif
661                         {
662                                 if(!threadsAwake)
663                                 {
664                                         suspend[0]->wait();
665
666                                         threadsAwake = 1;
667                                         task[0].type = Task::RESUME;
668
669                                         resume[0]->signal();
670                                 }
671                         }
672                 }
673         }
674
675         void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
676         {
677                 SliceRect rect = clearRect;
678                 int samples = dest->getDepth();
679
680                 for(rect.slice = 0; rect.slice < samples; rect.slice++)
681                 {
682                         blitter->clear(value, format, dest, rect, rgbaMask);
683                 }
684         }
685
686         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
687         {
688                 blitter->blit(source, sRect, dest, dRect, filter, isStencil);
689         }
690
691         void Renderer::blit3D(Surface *source, Surface *dest)
692         {
693                 blitter->blit3D(source, dest);
694         }
695
696         void Renderer::threadFunction(void *parameters)
697         {
698                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
699                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
700
701                 if(logPrecision < IEEE)
702                 {
703                         CPUID::setFlushToZero(true);
704                         CPUID::setDenormalsAreZero(true);
705                 }
706
707                 renderer->threadLoop(threadIndex);
708         }
709
710         void Renderer::threadLoop(int threadIndex)
711         {
712                 while(!exitThreads)
713                 {
714                         taskLoop(threadIndex);
715
716                         suspend[threadIndex]->signal();
717                         resume[threadIndex]->wait();
718                 }
719         }
720
721         void Renderer::taskLoop(int threadIndex)
722         {
723                 while(task[threadIndex].type != Task::SUSPEND)
724                 {
725                         scheduleTask(threadIndex);
726                         executeTask(threadIndex);
727                 }
728         }
729
730         void Renderer::findAvailableTasks()
731         {
732                 // Find pixel tasks
733                 for(int cluster = 0; cluster < clusterCount; cluster++)
734                 {
735                         if(!pixelProgress[cluster].executing)
736                         {
737                                 for(int unit = 0; unit < unitCount; unit++)
738                                 {
739                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
740                                         {
741                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
742                                                 {
743                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
744                                                         {
745                                                                 Task &task = taskQueue[qHead];
746                                                                 task.type = Task::PIXELS;
747                                                                 task.primitiveUnit = unit;
748                                                                 task.pixelCluster = cluster;
749
750                                                                 pixelProgress[cluster].executing = true;
751
752                                                                 // Commit to the task queue
753                                                                 qHead = (qHead + 1) % 32;
754                                                                 qSize++;
755
756                                                                 break;
757                                                         }
758                                                 }
759                                         }
760                                 }
761                         }
762                 }
763
764                 // Find primitive tasks
765                 if(currentDraw == nextDraw)
766                 {
767                         return;   // No more primitives to process
768                 }
769
770                 for(int unit = 0; unit < unitCount; unit++)
771                 {
772                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
773
774                         if(draw->primitive >= draw->count)
775                         {
776                                 currentDraw++;
777
778                                 if(currentDraw == nextDraw)
779                                 {
780                                         return;   // No more primitives to process
781                                 }
782
783                                 draw = drawList[currentDraw % DRAW_COUNT];
784                         }
785
786                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
787                         {
788                                 int primitive = draw->primitive;
789                                 int count = draw->count;
790                                 int batch = draw->batchSize;
791
792                                 primitiveProgress[unit].drawCall = currentDraw;
793                                 primitiveProgress[unit].firstPrimitive = primitive;
794                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
795
796                                 draw->primitive += batch;
797
798                                 Task &task = taskQueue[qHead];
799                                 task.type = Task::PRIMITIVES;
800                                 task.primitiveUnit = unit;
801
802                                 primitiveProgress[unit].references = -1;
803
804                                 // Commit to the task queue
805                                 qHead = (qHead + 1) % 32;
806                                 qSize++;
807                         }
808                 }
809         }
810
811         void Renderer::scheduleTask(int threadIndex)
812         {
813                 schedulerMutex.lock();
814
815                 if((int)qSize < threadCount - threadsAwake + 1)
816                 {
817                         findAvailableTasks();
818                 }
819
820                 if(qSize != 0)
821                 {
822                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
823                         qSize--;
824
825                         if(threadsAwake != threadCount)
826                         {
827                                 int wakeup = qSize - threadsAwake + 1;
828
829                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
830                                 {
831                                         if(task[i].type == Task::SUSPEND)
832                                         {
833                                                 suspend[i]->wait();
834                                                 task[i].type = Task::RESUME;
835                                                 resume[i]->signal();
836
837                                                 threadsAwake++;
838                                                 wakeup--;
839                                         }
840                                 }
841                         }
842                 }
843                 else
844                 {
845                         task[threadIndex].type = Task::SUSPEND;
846
847                         threadsAwake--;
848                 }
849
850                 schedulerMutex.unlock();
851         }
852
853         void Renderer::executeTask(int threadIndex)
854         {
855                 #if PERF_HUD
856                         int64_t startTick = Timer::ticks();
857                 #endif
858
859                 switch(task[threadIndex].type)
860                 {
861                 case Task::PRIMITIVES:
862                         {
863                                 int unit = task[threadIndex].primitiveUnit;
864
865                                 int input = primitiveProgress[unit].firstPrimitive;
866                                 int count = primitiveProgress[unit].primitiveCount;
867                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
868                                 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
869
870                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
871
872                                 #if PERF_HUD
873                                         int64_t time = Timer::ticks();
874                                         vertexTime[threadIndex] += time - startTick;
875                                         startTick = time;
876                                 #endif
877
878                                 int visible = 0;
879
880                                 if(!draw->setupState.rasterizerDiscard)
881                                 {
882                                         visible = (this->*setupPrimitives)(unit, count);
883                                 }
884
885                                 primitiveProgress[unit].visible = visible;
886                                 primitiveProgress[unit].references = clusterCount;
887
888                                 #if PERF_HUD
889                                         setupTime[threadIndex] += Timer::ticks() - startTick;
890                                 #endif
891                         }
892                         break;
893                 case Task::PIXELS:
894                         {
895                                 int unit = task[threadIndex].primitiveUnit;
896                                 int visible = primitiveProgress[unit].visible;
897
898                                 if(visible > 0)
899                                 {
900                                         int cluster = task[threadIndex].pixelCluster;
901                                         Primitive *primitive = primitiveBatch[unit];
902                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
903                                         DrawData *data = draw->data;
904                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
905
906                                         pixelRoutine(primitive, visible, cluster, data);
907                                 }
908
909                                 finishRendering(task[threadIndex]);
910
911                                 #if PERF_HUD
912                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
913                                 #endif
914                         }
915                         break;
916                 case Task::RESUME:
917                         break;
918                 case Task::SUSPEND:
919                         break;
920                 default:
921                         ASSERT(false);
922                 }
923         }
924
925         void Renderer::synchronize()
926         {
927                 sync->lock(sw::PUBLIC);
928                 sync->unlock();
929         }
930
931         void Renderer::finishRendering(Task &pixelTask)
932         {
933                 int unit = pixelTask.primitiveUnit;
934                 int cluster = pixelTask.pixelCluster;
935
936                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
937                 DrawData &data = *draw.data;
938                 int primitive = primitiveProgress[unit].firstPrimitive;
939                 int count = primitiveProgress[unit].primitiveCount;
940                 int processedPrimitives = primitive + count;
941
942                 pixelProgress[cluster].processedPrimitives = processedPrimitives;
943
944                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
945                 {
946                         pixelProgress[cluster].drawCall++;
947                         pixelProgress[cluster].processedPrimitives = 0;
948                 }
949
950                 int ref = atomicDecrement(&primitiveProgress[unit].references);
951
952                 if(ref == 0)
953                 {
954                         ref = atomicDecrement(&draw.references);
955
956                         if(ref == 0)
957                         {
958                                 #if PERF_PROFILE
959                                         for(int cluster = 0; cluster < clusterCount; cluster++)
960                                         {
961                                                 for(int i = 0; i < PERF_TIMERS; i++)
962                                                 {
963                                                         profiler.cycles[i] += data.cycles[i][cluster];
964                                                 }
965                                         }
966                                 #endif
967
968                                 if(draw.queries)
969                                 {
970                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
971                                         {
972                                                 Query *query = *q;
973
974                                                 switch(query->type)
975                                                 {
976                                                 case Query::FRAGMENTS_PASSED:
977                                                         for(int cluster = 0; cluster < clusterCount; cluster++)
978                                                         {
979                                                                 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
980                                                         }
981                                                         break;
982                                                 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
983                                                         atomicAdd((volatile int*)&query->data, processedPrimitives);
984                                                         break;
985                                                 default:
986                                                         break;
987                                                 }
988
989                                                 atomicDecrement(&query->reference);
990                                         }
991
992                                         delete draw.queries;
993                                         draw.queries = 0;
994                                 }
995
996                                 for(int i = 0; i < RENDERTARGETS; i++)
997                                 {
998                                         if(draw.renderTarget[i])
999                                         {
1000                                                 draw.renderTarget[i]->unlockInternal();
1001                                         }
1002                                 }
1003
1004                                 if(draw.depthBuffer)
1005                                 {
1006                                         draw.depthBuffer->unlockInternal();
1007                                 }
1008
1009                                 if(draw.stencilBuffer)
1010                                 {
1011                                         draw.stencilBuffer->unlockStencil();
1012                                 }
1013
1014                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
1015                                 {
1016                                         if(draw.texture[i])
1017                                         {
1018                                                 draw.texture[i]->unlock();
1019                                         }
1020                                 }
1021
1022                                 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
1023                                 {
1024                                         if(draw.vertexStream[i])
1025                                         {
1026                                                 draw.vertexStream[i]->unlock();
1027                                         }
1028                                 }
1029
1030                                 if(draw.indexBuffer)
1031                                 {
1032                                         draw.indexBuffer->unlock();
1033                                 }
1034
1035                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1036                                 {
1037                                         if(draw.pUniformBuffers[i])
1038                                         {
1039                                                 draw.pUniformBuffers[i]->unlock();
1040                                         }
1041                                         if(draw.vUniformBuffers[i])
1042                                         {
1043                                                 draw.vUniformBuffers[i]->unlock();
1044                                         }
1045                                 }
1046
1047                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1048                                 {
1049                                         if(draw.transformFeedbackBuffers[i])
1050                                         {
1051                                                 draw.transformFeedbackBuffers[i]->unlock();
1052                                         }
1053                                 }
1054
1055                                 draw.vertexRoutine->unbind();
1056                                 draw.setupRoutine->unbind();
1057                                 draw.pixelRoutine->unbind();
1058
1059                                 sync->unlock();
1060
1061                                 draw.references = -1;
1062                                 resumeApp->signal();
1063                         }
1064                 }
1065
1066                 pixelProgress[cluster].executing = false;
1067         }
1068
1069         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1070         {
1071                 Triangle *triangle = triangleBatch[unit];
1072                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1073                 DrawData *data = draw->data;
1074                 VertexTask *task = vertexTask[thread];
1075
1076                 const void *indices = data->indices;
1077                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1078
1079                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1080                 {
1081                         task->vertexCache.clear();
1082                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1083                 }
1084
1085                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1086
1087                 switch(draw->drawType)
1088                 {
1089                 case DRAW_POINTLIST:
1090                         {
1091                                 unsigned int index = start;
1092
1093                                 for(unsigned int i = 0; i < triangleCount; i++)
1094                                 {
1095                                         batch[i][0] = index;
1096                                         batch[i][1] = index;
1097                                         batch[i][2] = index;
1098
1099                                         index += 1;
1100                                 }
1101                         }
1102                         break;
1103                 case DRAW_LINELIST:
1104                         {
1105                                 unsigned int index = 2 * start;
1106
1107                                 for(unsigned int i = 0; i < triangleCount; i++)
1108                                 {
1109                                         batch[i][0] = index + 0;
1110                                         batch[i][1] = index + 1;
1111                                         batch[i][2] = index + 1;
1112
1113                                         index += 2;
1114                                 }
1115                         }
1116                         break;
1117                 case DRAW_LINESTRIP:
1118                         {
1119                                 unsigned int index = start;
1120
1121                                 for(unsigned int i = 0; i < triangleCount; i++)
1122                                 {
1123                                         batch[i][0] = index + 0;
1124                                         batch[i][1] = index + 1;
1125                                         batch[i][2] = index + 1;
1126
1127                                         index += 1;
1128                                 }
1129                         }
1130                         break;
1131                 case DRAW_LINELOOP:
1132                         {
1133                                 unsigned int index = start;
1134
1135                                 for(unsigned int i = 0; i < triangleCount; i++)
1136                                 {
1137                                         batch[i][0] = (index + 0) % loop;
1138                                         batch[i][1] = (index + 1) % loop;
1139                                         batch[i][2] = (index + 1) % loop;
1140
1141                                         index += 1;
1142                                 }
1143                         }
1144                         break;
1145                 case DRAW_TRIANGLELIST:
1146                         {
1147                                 unsigned int index = 3 * start;
1148
1149                                 for(unsigned int i = 0; i < triangleCount; i++)
1150                                 {
1151                                         batch[i][0] = index + 0;
1152                                         batch[i][1] = index + 1;
1153                                         batch[i][2] = index + 2;
1154
1155                                         index += 3;
1156                                 }
1157                         }
1158                         break;
1159                 case DRAW_TRIANGLESTRIP:
1160                         {
1161                                 unsigned int index = start;
1162
1163                                 for(unsigned int i = 0; i < triangleCount; i++)
1164                                 {
1165                                         batch[i][0] = index + 0;
1166                                         batch[i][1] = index + (index & 1) + 1;
1167                                         batch[i][2] = index + (~index & 1) + 1;
1168
1169                                         index += 1;
1170                                 }
1171                         }
1172                         break;
1173                 case DRAW_TRIANGLEFAN:
1174                         {
1175                                 unsigned int index = start;
1176
1177                                 for(unsigned int i = 0; i < triangleCount; i++)
1178                                 {
1179                                         batch[i][0] = index + 1;
1180                                         batch[i][1] = index + 2;
1181                                         batch[i][2] = 0;
1182
1183                                         index += 1;
1184                                 }
1185                         }
1186                         break;
1187                 case DRAW_INDEXEDPOINTLIST8:
1188                         {
1189                                 const unsigned char *index = (const unsigned char*)indices + start;
1190
1191                                 for(unsigned int i = 0; i < triangleCount; i++)
1192                                 {
1193                                         batch[i][0] = *index;
1194                                         batch[i][1] = *index;
1195                                         batch[i][2] = *index;
1196
1197                                         index += 1;
1198                                 }
1199                         }
1200                         break;
1201                 case DRAW_INDEXEDPOINTLIST16:
1202                         {
1203                                 const unsigned short *index = (const unsigned short*)indices + start;
1204
1205                                 for(unsigned int i = 0; i < triangleCount; i++)
1206                                 {
1207                                         batch[i][0] = *index;
1208                                         batch[i][1] = *index;
1209                                         batch[i][2] = *index;
1210
1211                                         index += 1;
1212                                 }
1213                         }
1214                         break;
1215                 case DRAW_INDEXEDPOINTLIST32:
1216                         {
1217                                 const unsigned int *index = (const unsigned int*)indices + start;
1218
1219                                 for(unsigned int i = 0; i < triangleCount; i++)
1220                                 {
1221                                         batch[i][0] = *index;
1222                                         batch[i][1] = *index;
1223                                         batch[i][2] = *index;
1224
1225                                         index += 1;
1226                                 }
1227                         }
1228                         break;
1229                 case DRAW_INDEXEDLINELIST8:
1230                         {
1231                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1232
1233                                 for(unsigned int i = 0; i < triangleCount; i++)
1234                                 {
1235                                         batch[i][0] = index[0];
1236                                         batch[i][1] = index[1];
1237                                         batch[i][2] = index[1];
1238
1239                                         index += 2;
1240                                 }
1241                         }
1242                         break;
1243                 case DRAW_INDEXEDLINELIST16:
1244                         {
1245                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1246
1247                                 for(unsigned int i = 0; i < triangleCount; i++)
1248                                 {
1249                                         batch[i][0] = index[0];
1250                                         batch[i][1] = index[1];
1251                                         batch[i][2] = index[1];
1252
1253                                         index += 2;
1254                                 }
1255                         }
1256                         break;
1257                 case DRAW_INDEXEDLINELIST32:
1258                         {
1259                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1260
1261                                 for(unsigned int i = 0; i < triangleCount; i++)
1262                                 {
1263                                         batch[i][0] = index[0];
1264                                         batch[i][1] = index[1];
1265                                         batch[i][2] = index[1];
1266
1267                                         index += 2;
1268                                 }
1269                         }
1270                         break;
1271                 case DRAW_INDEXEDLINESTRIP8:
1272                         {
1273                                 const unsigned char *index = (const unsigned char*)indices + start;
1274
1275                                 for(unsigned int i = 0; i < triangleCount; i++)
1276                                 {
1277                                         batch[i][0] = index[0];
1278                                         batch[i][1] = index[1];
1279                                         batch[i][2] = index[1];
1280
1281                                         index += 1;
1282                                 }
1283                         }
1284                         break;
1285                 case DRAW_INDEXEDLINESTRIP16:
1286                         {
1287                                 const unsigned short *index = (const unsigned short*)indices + start;
1288
1289                                 for(unsigned int i = 0; i < triangleCount; i++)
1290                                 {
1291                                         batch[i][0] = index[0];
1292                                         batch[i][1] = index[1];
1293                                         batch[i][2] = index[1];
1294
1295                                         index += 1;
1296                                 }
1297                         }
1298                         break;
1299                 case DRAW_INDEXEDLINESTRIP32:
1300                         {
1301                                 const unsigned int *index = (const unsigned int*)indices + start;
1302
1303                                 for(unsigned int i = 0; i < triangleCount; i++)
1304                                 {
1305                                         batch[i][0] = index[0];
1306                                         batch[i][1] = index[1];
1307                                         batch[i][2] = index[1];
1308
1309                                         index += 1;
1310                                 }
1311                         }
1312                         break;
1313                 case DRAW_INDEXEDLINELOOP8:
1314                         {
1315                                 const unsigned char *index = (const unsigned char*)indices;
1316
1317                                 for(unsigned int i = 0; i < triangleCount; i++)
1318                                 {
1319                                         batch[i][0] = index[(start + i + 0) % loop];
1320                                         batch[i][1] = index[(start + i + 1) % loop];
1321                                         batch[i][2] = index[(start + i + 1) % loop];
1322                                 }
1323                         }
1324                         break;
1325                 case DRAW_INDEXEDLINELOOP16:
1326                         {
1327                                 const unsigned short *index = (const unsigned short*)indices;
1328
1329                                 for(unsigned int i = 0; i < triangleCount; i++)
1330                                 {
1331                                         batch[i][0] = index[(start + i + 0) % loop];
1332                                         batch[i][1] = index[(start + i + 1) % loop];
1333                                         batch[i][2] = index[(start + i + 1) % loop];
1334                                 }
1335                         }
1336                         break;
1337                 case DRAW_INDEXEDLINELOOP32:
1338                         {
1339                                 const unsigned int *index = (const unsigned int*)indices;
1340
1341                                 for(unsigned int i = 0; i < triangleCount; i++)
1342                                 {
1343                                         batch[i][0] = index[(start + i + 0) % loop];
1344                                         batch[i][1] = index[(start + i + 1) % loop];
1345                                         batch[i][2] = index[(start + i + 1) % loop];
1346                                 }
1347                         }
1348                         break;
1349                 case DRAW_INDEXEDTRIANGLELIST8:
1350                         {
1351                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1352
1353                                 for(unsigned int i = 0; i < triangleCount; i++)
1354                                 {
1355                                         batch[i][0] = index[0];
1356                                         batch[i][1] = index[1];
1357                                         batch[i][2] = index[2];
1358
1359                                         index += 3;
1360                                 }
1361                         }
1362                         break;
1363                 case DRAW_INDEXEDTRIANGLELIST16:
1364                         {
1365                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1366
1367                                 for(unsigned int i = 0; i < triangleCount; i++)
1368                                 {
1369                                         batch[i][0] = index[0];
1370                                         batch[i][1] = index[1];
1371                                         batch[i][2] = index[2];
1372
1373                                         index += 3;
1374                                 }
1375                         }
1376                         break;
1377                 case DRAW_INDEXEDTRIANGLELIST32:
1378                         {
1379                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1380
1381                                 for(unsigned int i = 0; i < triangleCount; i++)
1382                                 {
1383                                         batch[i][0] = index[0];
1384                                         batch[i][1] = index[1];
1385                                         batch[i][2] = index[2];
1386
1387                                         index += 3;
1388                                 }
1389                         }
1390                         break;
1391                 case DRAW_INDEXEDTRIANGLESTRIP8:
1392                         {
1393                                 const unsigned char *index = (const unsigned char*)indices + start;
1394
1395                                 for(unsigned int i = 0; i < triangleCount; i++)
1396                                 {
1397                                         batch[i][0] = index[0];
1398                                         batch[i][1] = index[((start + i) & 1) + 1];
1399                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1400
1401                                         index += 1;
1402                                 }
1403                         }
1404                         break;
1405                 case DRAW_INDEXEDTRIANGLESTRIP16:
1406                         {
1407                                 const unsigned short *index = (const unsigned short*)indices + start;
1408
1409                                 for(unsigned int i = 0; i < triangleCount; i++)
1410                                 {
1411                                         batch[i][0] = index[0];
1412                                         batch[i][1] = index[((start + i) & 1) + 1];
1413                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1414
1415                                         index += 1;
1416                                 }
1417                         }
1418                         break;
1419                 case DRAW_INDEXEDTRIANGLESTRIP32:
1420                         {
1421                                 const unsigned int *index = (const unsigned int*)indices + start;
1422
1423                                 for(unsigned int i = 0; i < triangleCount; i++)
1424                                 {
1425                                         batch[i][0] = index[0];
1426                                         batch[i][1] = index[((start + i) & 1) + 1];
1427                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1428
1429                                         index += 1;
1430                                 }
1431                         }
1432                         break;
1433                 case DRAW_INDEXEDTRIANGLEFAN8:
1434                         {
1435                                 const unsigned char *index = (const unsigned char*)indices;
1436
1437                                 for(unsigned int i = 0; i < triangleCount; i++)
1438                                 {
1439                                         batch[i][0] = index[start + i + 1];
1440                                         batch[i][1] = index[start + i + 2];
1441                                         batch[i][2] = index[0];
1442                                 }
1443                         }
1444                         break;
1445                 case DRAW_INDEXEDTRIANGLEFAN16:
1446                         {
1447                                 const unsigned short *index = (const unsigned short*)indices;
1448
1449                                 for(unsigned int i = 0; i < triangleCount; i++)
1450                                 {
1451                                         batch[i][0] = index[start + i + 1];
1452                                         batch[i][1] = index[start + i + 2];
1453                                         batch[i][2] = index[0];
1454                                 }
1455                         }
1456                         break;
1457                 case DRAW_INDEXEDTRIANGLEFAN32:
1458                         {
1459                                 const unsigned int *index = (const unsigned int*)indices;
1460
1461                                 for(unsigned int i = 0; i < triangleCount; i++)
1462                                 {
1463                                         batch[i][0] = index[start + i + 1];
1464                                         batch[i][1] = index[start + i + 2];
1465                                         batch[i][2] = index[0];
1466                                 }
1467                         }
1468                         break;
1469                 case DRAW_QUADLIST:
1470                         {
1471                                 unsigned int index = 4 * start / 2;
1472
1473                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1474                                 {
1475                                         batch[i+0][0] = index + 0;
1476                                         batch[i+0][1] = index + 1;
1477                                         batch[i+0][2] = index + 2;
1478
1479                                         batch[i+1][0] = index + 0;
1480                                         batch[i+1][1] = index + 2;
1481                                         batch[i+1][2] = index + 3;
1482
1483                                         index += 4;
1484                                 }
1485                         }
1486                         break;
1487                 default:
1488                         ASSERT(false);
1489                         return;
1490                 }
1491
1492                 task->primitiveStart = start;
1493                 task->vertexCount = triangleCount * 3;
1494                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1495         }
1496
1497         int Renderer::setupSolidTriangles(int unit, int count)
1498         {
1499                 Triangle *triangle = triangleBatch[unit];
1500                 Primitive *primitive = primitiveBatch[unit];
1501
1502                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1503                 SetupProcessor::State &state = draw.setupState;
1504                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1505
1506                 int ms = state.multiSample;
1507                 int pos = state.positionRegister;
1508                 const DrawData *data = draw.data;
1509                 int visible = 0;
1510
1511                 for(int i = 0; i < count; i++, triangle++)
1512                 {
1513                         Vertex &v0 = triangle->v0;
1514                         Vertex &v1 = triangle->v1;
1515                         Vertex &v2 = triangle->v2;
1516
1517                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1518                         {
1519                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1520
1521                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1522
1523                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1524                                 {
1525                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1526                                         {
1527                                                 continue;
1528                                         }
1529                                 }
1530
1531                                 if(setupRoutine(primitive, triangle, &polygon, data))
1532                                 {
1533                                         primitive += ms;
1534                                         visible++;
1535                                 }
1536                         }
1537                 }
1538
1539                 return visible;
1540         }
1541
1542         int Renderer::setupWireframeTriangle(int unit, int count)
1543         {
1544                 Triangle *triangle = triangleBatch[unit];
1545                 Primitive *primitive = primitiveBatch[unit];
1546                 int visible = 0;
1547
1548                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1549                 SetupProcessor::State &state = draw.setupState;
1550
1551                 const Vertex &v0 = triangle[0].v0;
1552                 const Vertex &v1 = triangle[0].v1;
1553                 const Vertex &v2 = triangle[0].v2;
1554
1555                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1556
1557                 if(state.cullMode == CULL_CLOCKWISE)
1558                 {
1559                         if(d >= 0) return 0;
1560                 }
1561                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1562                 {
1563                         if(d <= 0) return 0;
1564                 }
1565
1566                 // Copy attributes
1567                 triangle[1].v0 = v1;
1568                 triangle[1].v1 = v2;
1569                 triangle[2].v0 = v2;
1570                 triangle[2].v1 = v0;
1571
1572                 if(state.color[0][0].flat)   // FIXME
1573                 {
1574                         for(int i = 0; i < 2; i++)
1575                         {
1576                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1577                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1578                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1579                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1580                         }
1581                 }
1582
1583                 for(int i = 0; i < 3; i++)
1584                 {
1585                         if(setupLine(*primitive, *triangle, draw))
1586                         {
1587                                 primitive->area = 0.5f * d;
1588
1589                                 primitive++;
1590                                 visible++;
1591                         }
1592
1593                         triangle++;
1594                 }
1595
1596                 return visible;
1597         }
1598
1599         int Renderer::setupVertexTriangle(int unit, int count)
1600         {
1601                 Triangle *triangle = triangleBatch[unit];
1602                 Primitive *primitive = primitiveBatch[unit];
1603                 int visible = 0;
1604
1605                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1606                 SetupProcessor::State &state = draw.setupState;
1607
1608                 const Vertex &v0 = triangle[0].v0;
1609                 const Vertex &v1 = triangle[0].v1;
1610                 const Vertex &v2 = triangle[0].v2;
1611
1612                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1613
1614                 if(state.cullMode == CULL_CLOCKWISE)
1615                 {
1616                         if(d >= 0) return 0;
1617                 }
1618                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1619                 {
1620                         if(d <= 0) return 0;
1621                 }
1622
1623                 // Copy attributes
1624                 triangle[1].v0 = v1;
1625                 triangle[2].v0 = v2;
1626
1627                 for(int i = 0; i < 3; i++)
1628                 {
1629                         if(setupPoint(*primitive, *triangle, draw))
1630                         {
1631                                 primitive->area = 0.5f * d;
1632
1633                                 primitive++;
1634                                 visible++;
1635                         }
1636
1637                         triangle++;
1638                 }
1639
1640                 return visible;
1641         }
1642
1643         int Renderer::setupLines(int unit, int count)
1644         {
1645                 Triangle *triangle = triangleBatch[unit];
1646                 Primitive *primitive = primitiveBatch[unit];
1647                 int visible = 0;
1648
1649                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1650                 SetupProcessor::State &state = draw.setupState;
1651
1652                 int ms = state.multiSample;
1653
1654                 for(int i = 0; i < count; i++)
1655                 {
1656                         if(setupLine(*primitive, *triangle, draw))
1657                         {
1658                                 primitive += ms;
1659                                 visible++;
1660                         }
1661
1662                         triangle++;
1663                 }
1664
1665                 return visible;
1666         }
1667
1668         int Renderer::setupPoints(int unit, int count)
1669         {
1670                 Triangle *triangle = triangleBatch[unit];
1671                 Primitive *primitive = primitiveBatch[unit];
1672                 int visible = 0;
1673
1674                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1675                 SetupProcessor::State &state = draw.setupState;
1676
1677                 int ms = state.multiSample;
1678
1679                 for(int i = 0; i < count; i++)
1680                 {
1681                         if(setupPoint(*primitive, *triangle, draw))
1682                         {
1683                                 primitive += ms;
1684                                 visible++;
1685                         }
1686
1687                         triangle++;
1688                 }
1689
1690                 return visible;
1691         }
1692
1693         bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1694         {
1695                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1696                 const SetupProcessor::State &state = draw.setupState;
1697                 const DrawData &data = *draw.data;
1698
1699                 float lineWidth = data.lineWidth;
1700
1701                 Vertex &v0 = triangle.v0;
1702                 Vertex &v1 = triangle.v1;
1703
1704                 int pos = state.positionRegister;
1705
1706                 const float4 &P0 = v0.v[pos];
1707                 const float4 &P1 = v1.v[pos];
1708
1709                 if(P0.w <= 0 && P1.w <= 0)
1710                 {
1711                         return false;
1712                 }
1713
1714                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1715                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1716
1717                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1718                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1719
1720                 if(dx == 0 && dy == 0)
1721                 {
1722                         return false;
1723                 }
1724
1725                 if(false)   // Rectangle
1726                 {
1727                         float4 P[4];
1728                         int C[4];
1729
1730                         P[0] = P0;
1731                         P[1] = P1;
1732                         P[2] = P1;
1733                         P[3] = P0;
1734
1735                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1736
1737                         dx *= scale;
1738                         dy *= scale;
1739
1740                         float dx0w = dx * P0.w / W;
1741                         float dy0h = dy * P0.w / H;
1742                         float dx0h = dx * P0.w / H;
1743                         float dy0w = dy * P0.w / W;
1744
1745                         float dx1w = dx * P1.w / W;
1746                         float dy1h = dy * P1.w / H;
1747                         float dx1h = dx * P1.w / H;
1748                         float dy1w = dy * P1.w / W;
1749
1750                         P[0].x += -dy0w + -dx0w;
1751                         P[0].y += -dx0h + +dy0h;
1752                         C[0] = clipper->computeClipFlags(P[0]);
1753
1754                         P[1].x += -dy1w + +dx1w;
1755                         P[1].y += -dx1h + +dy1h;
1756                         C[1] = clipper->computeClipFlags(P[1]);
1757
1758                         P[2].x += +dy1w + +dx1w;
1759                         P[2].y += +dx1h + -dy1h;
1760                         C[2] = clipper->computeClipFlags(P[2]);
1761
1762                         P[3].x += +dy0w + -dx0w;
1763                         P[3].y += +dx0h + +dy0h;
1764                         C[3] = clipper->computeClipFlags(P[3]);
1765
1766                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1767                         {
1768                                 Polygon polygon(P, 4);
1769
1770                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1771
1772                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1773                                 {
1774                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1775                                         {
1776                                                 return false;
1777                                         }
1778                                 }
1779
1780                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1781                         }
1782                 }
1783                 else   // Diamond test convention
1784                 {
1785                         float4 P[8];
1786                         int C[8];
1787
1788                         P[0] = P0;
1789                         P[1] = P0;
1790                         P[2] = P0;
1791                         P[3] = P0;
1792                         P[4] = P1;
1793                         P[5] = P1;
1794                         P[6] = P1;
1795                         P[7] = P1;
1796
1797                         float dx0 = lineWidth * 0.5f * P0.w / W;
1798                         float dy0 = lineWidth * 0.5f * P0.w / H;
1799
1800                         float dx1 = lineWidth * 0.5f * P1.w / W;
1801                         float dy1 = lineWidth * 0.5f * P1.w / H;
1802
1803                         P[0].x += -dx0;
1804                         C[0] = clipper->computeClipFlags(P[0]);
1805
1806                         P[1].y += +dy0;
1807                         C[1] = clipper->computeClipFlags(P[1]);
1808
1809                         P[2].x += +dx0;
1810                         C[2] = clipper->computeClipFlags(P[2]);
1811
1812                         P[3].y += -dy0;
1813                         C[3] = clipper->computeClipFlags(P[3]);
1814
1815                         P[4].x += -dx1;
1816                         C[4] = clipper->computeClipFlags(P[4]);
1817
1818                         P[5].y += +dy1;
1819                         C[5] = clipper->computeClipFlags(P[5]);
1820
1821                         P[6].x += +dx1;
1822                         C[6] = clipper->computeClipFlags(P[6]);
1823
1824                         P[7].y += -dy1;
1825                         C[7] = clipper->computeClipFlags(P[7]);
1826
1827                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1828                         {
1829                                 float4 L[6];
1830
1831                                 if(dx > -dy)
1832                                 {
1833                                         if(dx > dy)   // Right
1834                                         {
1835                                                 L[0] = P[0];
1836                                                 L[1] = P[1];
1837                                                 L[2] = P[5];
1838                                                 L[3] = P[6];
1839                                                 L[4] = P[7];
1840                                                 L[5] = P[3];
1841                                         }
1842                                         else   // Down
1843                                         {
1844                                                 L[0] = P[0];
1845                                                 L[1] = P[4];
1846                                                 L[2] = P[5];
1847                                                 L[3] = P[6];
1848                                                 L[4] = P[2];
1849                                                 L[5] = P[3];
1850                                         }
1851                                 }
1852                                 else
1853                                 {
1854                                         if(dx > dy)   // Up
1855                                         {
1856                                                 L[0] = P[0];
1857                                                 L[1] = P[1];
1858                                                 L[2] = P[2];
1859                                                 L[3] = P[6];
1860                                                 L[4] = P[7];
1861                                                 L[5] = P[4];
1862                                         }
1863                                         else   // Left
1864                                         {
1865                                                 L[0] = P[1];
1866                                                 L[1] = P[2];
1867                                                 L[2] = P[3];
1868                                                 L[3] = P[7];
1869                                                 L[4] = P[4];
1870                                                 L[5] = P[5];
1871                                         }
1872                                 }
1873
1874                                 Polygon polygon(L, 6);
1875
1876                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1877
1878                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1879                                 {
1880                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1881                                         {
1882                                                 return false;
1883                                         }
1884                                 }
1885
1886                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1887                         }
1888                 }
1889
1890                 return false;
1891         }
1892
1893         bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1894         {
1895                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1896                 const SetupProcessor::State &state = draw.setupState;
1897                 const DrawData &data = *draw.data;
1898
1899                 Vertex &v = triangle.v0;
1900
1901                 float pSize;
1902
1903                 int pts = state.pointSizeRegister;
1904
1905                 if(state.pointSizeRegister != Unused)
1906                 {
1907                         pSize = v.v[pts].y;
1908                 }
1909                 else
1910                 {
1911                         pSize = data.point.pointSize[0];
1912                 }
1913
1914                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1915
1916                 float4 P[4];
1917                 int C[4];
1918
1919                 int pos = state.positionRegister;
1920
1921                 P[0] = v.v[pos];
1922                 P[1] = v.v[pos];
1923                 P[2] = v.v[pos];
1924                 P[3] = v.v[pos];
1925
1926                 const float X = pSize * P[0].w * data.halfPixelX[0];
1927                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1928
1929                 P[0].x -= X;
1930                 P[0].y += Y;
1931                 C[0] = clipper->computeClipFlags(P[0]);
1932
1933                 P[1].x += X;
1934                 P[1].y += Y;
1935                 C[1] = clipper->computeClipFlags(P[1]);
1936
1937                 P[2].x += X;
1938                 P[2].y -= Y;
1939                 C[2] = clipper->computeClipFlags(P[2]);
1940
1941                 P[3].x -= X;
1942                 P[3].y -= Y;
1943                 C[3] = clipper->computeClipFlags(P[3]);
1944
1945                 triangle.v1 = triangle.v0;
1946                 triangle.v2 = triangle.v0;
1947
1948                 triangle.v1.X += iround(16 * 0.5f * pSize);
1949                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1950
1951                 Polygon polygon(P, 4);
1952
1953                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1954                 {
1955                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1956
1957                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1958                         {
1959                                 if(!clipper->clip(polygon, clipFlagsOr, draw))
1960                                 {
1961                                         return false;
1962                                 }
1963                         }
1964
1965                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1966                 }
1967
1968                 return false;
1969         }
1970
1971         void Renderer::initializeThreads()
1972         {
1973                 unitCount = ceilPow2(threadCount);
1974                 clusterCount = ceilPow2(threadCount);
1975
1976                 for(int i = 0; i < unitCount; i++)
1977                 {
1978                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1979                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1980                 }
1981
1982                 for(int i = 0; i < threadCount; i++)
1983                 {
1984                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1985                         vertexTask[i]->vertexCache.drawCall = -1;
1986
1987                         task[i].type = Task::SUSPEND;
1988
1989                         resume[i] = new Event();
1990                         suspend[i] = new Event();
1991
1992                         Parameters parameters;
1993                         parameters.threadIndex = i;
1994                         parameters.renderer = this;
1995
1996                         exitThreads = false;
1997                         worker[i] = new Thread(threadFunction, &parameters);
1998
1999                         suspend[i]->wait();
2000                         suspend[i]->signal();
2001                 }
2002         }
2003
2004         void Renderer::terminateThreads()
2005         {
2006                 while(threadsAwake != 0)
2007                 {
2008                         Thread::sleep(1);
2009                 }
2010
2011                 for(int thread = 0; thread < threadCount; thread++)
2012                 {
2013                         if(worker[thread])
2014                         {
2015                                 exitThreads = true;
2016                                 resume[thread]->signal();
2017                                 worker[thread]->join();
2018
2019                                 delete worker[thread];
2020                                 worker[thread] = 0;
2021                                 delete resume[thread];
2022                                 resume[thread] = 0;
2023                                 delete suspend[thread];
2024                                 suspend[thread] = 0;
2025                         }
2026
2027                         deallocate(vertexTask[thread]);
2028                         vertexTask[thread] = 0;
2029                 }
2030
2031                 for(int i = 0; i < 16; i++)
2032                 {
2033                         deallocate(triangleBatch[i]);
2034                         triangleBatch[i] = 0;
2035
2036                         deallocate(primitiveBatch[i]);
2037                         primitiveBatch[i] = 0;
2038                 }
2039         }
2040
2041         void Renderer::loadConstants(const VertexShader *vertexShader)
2042         {
2043                 if(!vertexShader) return;
2044
2045                 size_t count = vertexShader->getLength();
2046
2047                 for(size_t i = 0; i < count; i++)
2048                 {
2049                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2050
2051                         if(instruction->opcode == Shader::OPCODE_DEF)
2052                         {
2053                                 int index = instruction->dst.index;
2054                                 float value[4];
2055
2056                                 value[0] = instruction->src[0].value[0];
2057                                 value[1] = instruction->src[0].value[1];
2058                                 value[2] = instruction->src[0].value[2];
2059                                 value[3] = instruction->src[0].value[3];
2060
2061                                 setVertexShaderConstantF(index, value);
2062                         }
2063                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2064                         {
2065                                 int index = instruction->dst.index;
2066                                 int integer[4];
2067
2068                                 integer[0] = instruction->src[0].integer[0];
2069                                 integer[1] = instruction->src[0].integer[1];
2070                                 integer[2] = instruction->src[0].integer[2];
2071                                 integer[3] = instruction->src[0].integer[3];
2072
2073                                 setVertexShaderConstantI(index, integer);
2074                         }
2075                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2076                         {
2077                                 int index = instruction->dst.index;
2078                                 int boolean = instruction->src[0].boolean[0];
2079
2080                                 setVertexShaderConstantB(index, &boolean);
2081                         }
2082                 }
2083         }
2084
2085         void Renderer::loadConstants(const PixelShader *pixelShader)
2086         {
2087                 if(!pixelShader) return;
2088
2089                 size_t count = pixelShader->getLength();
2090
2091                 for(size_t i = 0; i < count; i++)
2092                 {
2093                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2094
2095                         if(instruction->opcode == Shader::OPCODE_DEF)
2096                         {
2097                                 int index = instruction->dst.index;
2098                                 float value[4];
2099
2100                                 value[0] = instruction->src[0].value[0];
2101                                 value[1] = instruction->src[0].value[1];
2102                                 value[2] = instruction->src[0].value[2];
2103                                 value[3] = instruction->src[0].value[3];
2104
2105                                 setPixelShaderConstantF(index, value);
2106                         }
2107                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2108                         {
2109                                 int index = instruction->dst.index;
2110                                 int integer[4];
2111
2112                                 integer[0] = instruction->src[0].integer[0];
2113                                 integer[1] = instruction->src[0].integer[1];
2114                                 integer[2] = instruction->src[0].integer[2];
2115                                 integer[3] = instruction->src[0].integer[3];
2116
2117                                 setPixelShaderConstantI(index, integer);
2118                         }
2119                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2120                         {
2121                                 int index = instruction->dst.index;
2122                                 int boolean = instruction->src[0].boolean[0];
2123
2124                                 setPixelShaderConstantB(index, &boolean);
2125                         }
2126                 }
2127         }
2128
2129         void Renderer::setIndexBuffer(Resource *indexBuffer)
2130         {
2131                 context->indexBuffer = indexBuffer;
2132         }
2133
2134         void Renderer::setMultiSampleMask(unsigned int mask)
2135         {
2136                 context->sampleMask = mask;
2137         }
2138
2139         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2140         {
2141                 sw::transparencyAntialiasing = transparencyAntialiasing;
2142         }
2143
2144         bool Renderer::isReadWriteTexture(int sampler)
2145         {
2146                 for(int index = 0; index < RENDERTARGETS; index++)
2147                 {
2148                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2149                         {
2150                                 return true;
2151                         }
2152                 }
2153
2154                 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2155                 {
2156                         return true;
2157                 }
2158
2159                 return false;
2160         }
2161
2162         void Renderer::updateClipper()
2163         {
2164                 if(updateClipPlanes)
2165                 {
2166                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2167                         {
2168                                 const Matrix &scissorWorld = getViewTransform();
2169
2170                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2171                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2172                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2173                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2174                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2175                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2176                         }
2177                         else   // User plane in clip space
2178                         {
2179                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2180                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2181                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2182                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2183                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2184                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2185                         }
2186
2187                         updateClipPlanes = false;
2188                 }
2189         }
2190
2191         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2192         {
2193                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2194
2195                 context->texture[sampler] = resource;
2196         }
2197
2198         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2199         {
2200                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2201
2202                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2203         }
2204
2205         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2206         {
2207                 if(type == SAMPLER_PIXEL)
2208                 {
2209                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2210                 }
2211                 else
2212                 {
2213                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2214                 }
2215         }
2216
2217         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2218         {
2219                 if(type == SAMPLER_PIXEL)
2220                 {
2221                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2222                 }
2223                 else
2224                 {
2225                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2226                 }
2227         }
2228
2229         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2230         {
2231                 if(type == SAMPLER_PIXEL)
2232                 {
2233                         PixelProcessor::setGatherEnable(sampler, enable);
2234                 }
2235                 else
2236                 {
2237                         VertexProcessor::setGatherEnable(sampler, enable);
2238                 }
2239         }
2240
2241         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2242         {
2243                 if(type == SAMPLER_PIXEL)
2244                 {
2245                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2246                 }
2247                 else
2248                 {
2249                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2250                 }
2251         }
2252
2253         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2254         {
2255                 if(type == SAMPLER_PIXEL)
2256                 {
2257                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2258                 }
2259                 else
2260                 {
2261                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2262                 }
2263         }
2264
2265         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2266         {
2267                 if(type == SAMPLER_PIXEL)
2268                 {
2269                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2270                 }
2271                 else
2272                 {
2273                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2274                 }
2275         }
2276
2277         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2278         {
2279                 if(type == SAMPLER_PIXEL)
2280                 {
2281                         PixelProcessor::setReadSRGB(sampler, sRGB);
2282                 }
2283                 else
2284                 {
2285                         VertexProcessor::setReadSRGB(sampler, sRGB);
2286                 }
2287         }
2288
2289         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2290         {
2291                 if(type == SAMPLER_PIXEL)
2292                 {
2293                         PixelProcessor::setMipmapLOD(sampler, bias);
2294                 }
2295                 else
2296                 {
2297                         VertexProcessor::setMipmapLOD(sampler, bias);
2298                 }
2299         }
2300
2301         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2302         {
2303                 if(type == SAMPLER_PIXEL)
2304                 {
2305                         PixelProcessor::setBorderColor(sampler, borderColor);
2306                 }
2307                 else
2308                 {
2309                         VertexProcessor::setBorderColor(sampler, borderColor);
2310                 }
2311         }
2312
2313         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2314         {
2315                 if(type == SAMPLER_PIXEL)
2316                 {
2317                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2318                 }
2319                 else
2320                 {
2321                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2322                 }
2323         }
2324
2325         void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
2326         {
2327                 if(type == SAMPLER_PIXEL)
2328                 {
2329                         PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2330                 }
2331                 else
2332                 {
2333                         VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2334                 }
2335         }
2336
2337         void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2338         {
2339                 if(type == SAMPLER_PIXEL)
2340                 {
2341                         PixelProcessor::setSwizzleR(sampler, swizzleR);
2342                 }
2343                 else
2344                 {
2345                         VertexProcessor::setSwizzleR(sampler, swizzleR);
2346                 }
2347         }
2348
2349         void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2350         {
2351                 if(type == SAMPLER_PIXEL)
2352                 {
2353                         PixelProcessor::setSwizzleG(sampler, swizzleG);
2354                 }
2355                 else
2356                 {
2357                         VertexProcessor::setSwizzleG(sampler, swizzleG);
2358                 }
2359         }
2360
2361         void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2362         {
2363                 if(type == SAMPLER_PIXEL)
2364                 {
2365                         PixelProcessor::setSwizzleB(sampler, swizzleB);
2366                 }
2367                 else
2368                 {
2369                         VertexProcessor::setSwizzleB(sampler, swizzleB);
2370                 }
2371         }
2372
2373         void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2374         {
2375                 if(type == SAMPLER_PIXEL)
2376                 {
2377                         PixelProcessor::setSwizzleA(sampler, swizzleA);
2378                 }
2379                 else
2380                 {
2381                         VertexProcessor::setSwizzleA(sampler, swizzleA);
2382                 }
2383         }
2384
2385         void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2386         {
2387                 if(type == SAMPLER_PIXEL)
2388                 {
2389                         PixelProcessor::setBaseLevel(sampler, baseLevel);
2390                 }
2391                 else
2392                 {
2393                         VertexProcessor::setBaseLevel(sampler, baseLevel);
2394                 }
2395         }
2396
2397         void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2398         {
2399                 if(type == SAMPLER_PIXEL)
2400                 {
2401                         PixelProcessor::setMaxLevel(sampler, maxLevel);
2402                 }
2403                 else
2404                 {
2405                         VertexProcessor::setMaxLevel(sampler, maxLevel);
2406                 }
2407         }
2408
2409         void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2410         {
2411                 if(type == SAMPLER_PIXEL)
2412                 {
2413                         PixelProcessor::setMinLod(sampler, minLod);
2414                 }
2415                 else
2416                 {
2417                         VertexProcessor::setMinLod(sampler, minLod);
2418                 }
2419         }
2420
2421         void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2422         {
2423                 if(type == SAMPLER_PIXEL)
2424                 {
2425                         PixelProcessor::setMaxLod(sampler, maxLod);
2426                 }
2427                 else
2428                 {
2429                         VertexProcessor::setMaxLod(sampler, maxLod);
2430                 }
2431         }
2432
2433         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2434         {
2435                 context->setPointSpriteEnable(pointSpriteEnable);
2436         }
2437
2438         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2439         {
2440                 context->setPointScaleEnable(pointScaleEnable);
2441         }
2442
2443         void Renderer::setLineWidth(float width)
2444         {
2445                 context->lineWidth = width;
2446         }
2447
2448         void Renderer::setDepthBias(float bias)
2449         {
2450                 depthBias = bias;
2451         }
2452
2453         void Renderer::setSlopeDepthBias(float slopeBias)
2454         {
2455                 slopeDepthBias = slopeBias;
2456         }
2457
2458         void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2459         {
2460                 context->rasterizerDiscard = rasterizerDiscard;
2461         }
2462
2463         void Renderer::setPixelShader(const PixelShader *shader)
2464         {
2465                 context->pixelShader = shader;
2466
2467                 loadConstants(shader);
2468         }
2469
2470         void Renderer::setVertexShader(const VertexShader *shader)
2471         {
2472                 context->vertexShader = shader;
2473
2474                 loadConstants(shader);
2475         }
2476
2477         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2478         {
2479                 for(int i = 0; i < DRAW_COUNT; i++)
2480                 {
2481                         if(drawCall[i]->psDirtyConstF < index + count)
2482                         {
2483                                 drawCall[i]->psDirtyConstF = index + count;
2484                         }
2485                 }
2486
2487                 for(int i = 0; i < count; i++)
2488                 {
2489                         PixelProcessor::setFloatConstant(index + i, value);
2490                         value += 4;
2491                 }
2492         }
2493
2494         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2495         {
2496                 for(int i = 0; i < DRAW_COUNT; i++)
2497                 {
2498                         if(drawCall[i]->psDirtyConstI < index + count)
2499                         {
2500                                 drawCall[i]->psDirtyConstI = index + count;
2501                         }
2502                 }
2503
2504                 for(int i = 0; i < count; i++)
2505                 {
2506                         PixelProcessor::setIntegerConstant(index + i, value);
2507                         value += 4;
2508                 }
2509         }
2510
2511         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2512         {
2513                 for(int i = 0; i < DRAW_COUNT; i++)
2514                 {
2515                         if(drawCall[i]->psDirtyConstB < index + count)
2516                         {
2517                                 drawCall[i]->psDirtyConstB = index + count;
2518                         }
2519                 }
2520
2521                 for(int i = 0; i < count; i++)
2522                 {
2523                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2524                         boolean++;
2525                 }
2526         }
2527
2528         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2529         {
2530                 for(int i = 0; i < DRAW_COUNT; i++)
2531                 {
2532                         if(drawCall[i]->vsDirtyConstF < index + count)
2533                         {
2534                                 drawCall[i]->vsDirtyConstF = index + count;
2535                         }
2536                 }
2537
2538                 for(int i = 0; i < count; i++)
2539                 {
2540                         VertexProcessor::setFloatConstant(index + i, value);
2541                         value += 4;
2542                 }
2543         }
2544
2545         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2546         {
2547                 for(int i = 0; i < DRAW_COUNT; i++)
2548                 {
2549                         if(drawCall[i]->vsDirtyConstI < index + count)
2550                         {
2551                                 drawCall[i]->vsDirtyConstI = index + count;
2552                         }
2553                 }
2554
2555                 for(int i = 0; i < count; i++)
2556                 {
2557                         VertexProcessor::setIntegerConstant(index + i, value);
2558                         value += 4;
2559                 }
2560         }
2561
2562         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2563         {
2564                 for(int i = 0; i < DRAW_COUNT; i++)
2565                 {
2566                         if(drawCall[i]->vsDirtyConstB < index + count)
2567                         {
2568                                 drawCall[i]->vsDirtyConstB = index + count;
2569                         }
2570                 }
2571
2572                 for(int i = 0; i < count; i++)
2573                 {
2574                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2575                         boolean++;
2576                 }
2577         }
2578
2579         void Renderer::setModelMatrix(const Matrix &M, int i)
2580         {
2581                 VertexProcessor::setModelMatrix(M, i);
2582         }
2583
2584         void Renderer::setViewMatrix(const Matrix &V)
2585         {
2586                 VertexProcessor::setViewMatrix(V);
2587                 updateClipPlanes = true;
2588         }
2589
2590         void Renderer::setBaseMatrix(const Matrix &B)
2591         {
2592                 VertexProcessor::setBaseMatrix(B);
2593                 updateClipPlanes = true;
2594         }
2595
2596         void Renderer::setProjectionMatrix(const Matrix &P)
2597         {
2598                 VertexProcessor::setProjectionMatrix(P);
2599                 updateClipPlanes = true;
2600         }
2601
2602         void Renderer::addQuery(Query *query)
2603         {
2604                 queries.push_back(query);
2605         }
2606
2607         void Renderer::removeQuery(Query *query)
2608         {
2609                 queries.remove(query);
2610         }
2611
2612         #if PERF_HUD
2613                 int Renderer::getThreadCount()
2614                 {
2615                         return threadCount;
2616                 }
2617
2618                 int64_t Renderer::getVertexTime(int thread)
2619                 {
2620                         return vertexTime[thread];
2621                 }
2622
2623                 int64_t Renderer::getSetupTime(int thread)
2624                 {
2625                         return setupTime[thread];
2626                 }
2627
2628                 int64_t Renderer::getPixelTime(int thread)
2629                 {
2630                         return pixelTime[thread];
2631                 }
2632
2633                 void Renderer::resetTimers()
2634                 {
2635                         for(int thread = 0; thread < threadCount; thread++)
2636                         {
2637                                 vertexTime[thread] = 0;
2638                                 setupTime[thread] = 0;
2639                                 pixelTime[thread] = 0;
2640                         }
2641                 }
2642         #endif
2643
2644         void Renderer::setViewport(const Viewport &viewport)
2645         {
2646                 this->viewport = viewport;
2647         }
2648
2649         void Renderer::setScissor(const Rect &scissor)
2650         {
2651                 this->scissor = scissor;
2652         }
2653
2654         void Renderer::setClipFlags(int flags)
2655         {
2656                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2657         }
2658
2659         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2660         {
2661                 if(index < MAX_CLIP_PLANES)
2662                 {
2663                         userPlane[index] = plane;
2664                 }
2665                 else ASSERT(false);
2666
2667                 updateClipPlanes = true;
2668         }
2669
2670         void Renderer::updateConfiguration(bool initialUpdate)
2671         {
2672                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2673
2674                 if(newConfiguration || initialUpdate)
2675                 {
2676                         terminateThreads();
2677
2678                         SwiftConfig::Configuration configuration = {};
2679                         swiftConfig->getConfiguration(configuration);
2680
2681                         precacheVertex = !newConfiguration && configuration.precache;
2682                         precacheSetup = !newConfiguration && configuration.precache;
2683                         precachePixel = !newConfiguration && configuration.precache;
2684
2685                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2686                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2687                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2688
2689                         switch(configuration.textureSampleQuality)
2690                         {
2691                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2692                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2693                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2694                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2695                         }
2696
2697                         switch(configuration.mipmapQuality)
2698                         {
2699                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2700                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2701                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2702                         }
2703
2704                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2705
2706                         switch(configuration.transcendentalPrecision)
2707                         {
2708                         case 0:
2709                                 logPrecision = APPROXIMATE;
2710                                 expPrecision = APPROXIMATE;
2711                                 rcpPrecision = APPROXIMATE;
2712                                 rsqPrecision = APPROXIMATE;
2713                                 break;
2714                         case 1:
2715                                 logPrecision = PARTIAL;
2716                                 expPrecision = PARTIAL;
2717                                 rcpPrecision = PARTIAL;
2718                                 rsqPrecision = PARTIAL;
2719                                 break;
2720                         case 2:
2721                                 logPrecision = ACCURATE;
2722                                 expPrecision = ACCURATE;
2723                                 rcpPrecision = ACCURATE;
2724                                 rsqPrecision = ACCURATE;
2725                                 break;
2726                         case 3:
2727                                 logPrecision = WHQL;
2728                                 expPrecision = WHQL;
2729                                 rcpPrecision = WHQL;
2730                                 rsqPrecision = WHQL;
2731                                 break;
2732                         case 4:
2733                                 logPrecision = IEEE;
2734                                 expPrecision = IEEE;
2735                                 rcpPrecision = IEEE;
2736                                 rsqPrecision = IEEE;
2737                                 break;
2738                         default:
2739                                 logPrecision = ACCURATE;
2740                                 expPrecision = ACCURATE;
2741                                 rcpPrecision = ACCURATE;
2742                                 rsqPrecision = ACCURATE;
2743                                 break;
2744                         }
2745
2746                         switch(configuration.transparencyAntialiasing)
2747                         {
2748                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2749                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2750                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2751                         }
2752
2753                         switch(configuration.threadCount)
2754                         {
2755                         case -1: threadCount = CPUID::coreCount();        break;
2756                         case 0:  threadCount = CPUID::processAffinity();  break;
2757                         default: threadCount = configuration.threadCount; break;
2758                         }
2759
2760                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2761                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2762                         CPUID::setEnableSSE3(configuration.enableSSE3);
2763                         CPUID::setEnableSSE2(configuration.enableSSE2);
2764                         CPUID::setEnableSSE(configuration.enableSSE);
2765
2766                         for(int pass = 0; pass < 10; pass++)
2767                         {
2768                                 optimization[pass] = configuration.optimization[pass];
2769                         }
2770
2771                         forceWindowed = configuration.forceWindowed;
2772                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2773                         postBlendSRGB = configuration.postBlendSRGB;
2774                         exactColorRounding = configuration.exactColorRounding;
2775                         forceClearRegisters = configuration.forceClearRegisters;
2776
2777                 #ifndef NDEBUG
2778                         minPrimitives = configuration.minPrimitives;
2779                         maxPrimitives = configuration.maxPrimitives;
2780                 #endif
2781                 }
2782
2783                 if(!initialUpdate && !worker[0])
2784                 {
2785                         initializeThreads();
2786                 }
2787         }
2788 }