OSDN Git Service

Make Blitter part of Renderer.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Math.hpp"
19 #include "FrameBuffer.hpp"
20 #include "Timer.hpp"
21 #include "Surface.hpp"
22 #include "Half.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
27 #include "CPUID.hpp"
28 #include "Memory.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
31 #include "Debug.hpp"
32 #include "Reactor/Reactor.hpp"
33
34 #undef max
35
36 bool disableServer = true;
37
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42
43 namespace sw
44 {
45         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47         extern bool booleanFaceRegister;
48         extern bool fullPixelPositionRegister;
49         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50         extern bool secondaryColor;             // Specular lighting is applied after texturing
51
52         extern bool forceWindowed;
53         extern bool complementaryDepthBuffer;
54         extern bool postBlendSRGB;
55         extern bool exactColorRounding;
56         extern TransparencyAntialiasing transparencyAntialiasing;
57         extern bool forceClearRegisters;
58
59         extern bool precacheVertex;
60         extern bool precacheSetup;
61         extern bool precachePixel;
62
63         int batchSize = 128;
64         int threadCount = 1;
65         int unitCount = 1;
66         int clusterCount = 1;
67
68         TranscendentalPrecision logPrecision = ACCURATE;
69         TranscendentalPrecision expPrecision = ACCURATE;
70         TranscendentalPrecision rcpPrecision = ACCURATE;
71         TranscendentalPrecision rsqPrecision = ACCURATE;
72         bool perspectiveCorrection = true;
73
74         struct Parameters
75         {
76                 Renderer *renderer;
77                 int threadIndex;
78         };
79
80         DrawCall::DrawCall()
81         {
82                 queries = 0;
83
84                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
85                 vsDirtyConstI = 16;
86                 vsDirtyConstB = 16;
87
88                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
89                 psDirtyConstI = 16;
90                 psDirtyConstB = 16;
91
92                 references = -1;
93
94                 data = (DrawData*)allocate(sizeof(DrawData));
95                 data->constants = &constants;
96         }
97
98         DrawCall::~DrawCall()
99         {
100                 delete queries;
101
102                 deallocate(data);
103         }
104
105         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
106         {
107                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
110                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
112                 sw::secondaryColor = conventions.secondaryColor;
113                 sw::exactColorRounding = exactColorRounding;
114
115                 setRenderTarget(0, 0);
116                 clipper = new Clipper(symmetricNormalizedDepth);
117                 blitter = new Blitter;
118
119                 updateViewMatrix = true;
120                 updateBaseMatrix = true;
121                 updateProjectionMatrix = true;
122                 updateClipPlanes = true;
123
124                 #if PERF_HUD
125                         resetTimers();
126                 #endif
127
128                 for(int i = 0; i < 16; i++)
129                 {
130                         vertexTask[i] = 0;
131
132                         worker[i] = 0;
133                         resume[i] = 0;
134                         suspend[i] = 0;
135                 }
136
137                 threadsAwake = 0;
138                 resumeApp = new Event();
139
140                 currentDraw = 0;
141                 nextDraw = 0;
142
143                 qHead = 0;
144                 qSize = 0;
145
146                 for(int i = 0; i < 16; i++)
147                 {
148                         triangleBatch[i] = 0;
149                         primitiveBatch[i] = 0;
150                 }
151
152                 for(int draw = 0; draw < DRAW_COUNT; draw++)
153                 {
154                         drawCall[draw] = new DrawCall();
155                         drawList[draw] = drawCall[draw];
156                 }
157
158                 for(int unit = 0; unit < 16; unit++)
159                 {
160                         primitiveProgress[unit].init();
161                 }
162
163                 for(int cluster = 0; cluster < 16; cluster++)
164                 {
165                         pixelProgress[cluster].init();
166                 }
167
168                 clipFlags = 0;
169
170                 swiftConfig = new SwiftConfig(disableServer);
171                 updateConfiguration(true);
172
173                 sync = new Resource(0);
174         }
175
176         Renderer::~Renderer()
177         {
178                 sync->destruct();
179
180                 delete clipper;
181                 clipper = nullptr;
182
183                 delete blitter;
184                 blitter = nullptr;
185
186                 terminateThreads();
187                 delete resumeApp;
188
189                 for(int draw = 0; draw < DRAW_COUNT; draw++)
190                 {
191                         delete drawCall[draw];
192                 }
193
194                 delete swiftConfig;
195         }
196
197         // This object has to be mem aligned
198         void* Renderer::operator new(size_t size)
199         {
200                 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
201                 return sw::allocate(sizeof(Renderer), 16);
202         }
203
204         void Renderer::operator delete(void * mem)
205         {
206                 sw::deallocate(mem);
207         }
208
209         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
210         {
211                 #ifndef NDEBUG
212                         if(count < minPrimitives || count > maxPrimitives)
213                         {
214                                 return;
215                         }
216                 #endif
217
218                 context->drawType = drawType;
219
220                 updateConfiguration();
221                 updateClipper();
222
223                 int ss = context->getSuperSampleCount();
224                 int ms = context->getMultiSampleCount();
225
226                 for(int q = 0; q < ss; q++)
227                 {
228                         unsigned int oldMultiSampleMask = context->multiSampleMask;
229                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
230
231                         if(!context->multiSampleMask)
232                         {
233                                 continue;
234                         }
235
236                         sync->lock(sw::PRIVATE);
237
238                         if(update || oldMultiSampleMask != context->multiSampleMask)
239                         {
240                                 vertexState = VertexProcessor::update(drawType);
241                                 setupState = SetupProcessor::update();
242                                 pixelState = PixelProcessor::update();
243
244                                 vertexRoutine = VertexProcessor::routine(vertexState);
245                                 setupRoutine = SetupProcessor::routine(setupState);
246                                 pixelRoutine = PixelProcessor::routine(pixelState);
247                         }
248
249                         int batch = batchSize / ms;
250
251                         int (Renderer::*setupPrimitives)(int batch, int count);
252
253                         if(context->isDrawTriangle())
254                         {
255                                 switch(context->fillMode)
256                                 {
257                                 case FILL_SOLID:
258                                         setupPrimitives = &Renderer::setupSolidTriangles;
259                                         break;
260                                 case FILL_WIREFRAME:
261                                         setupPrimitives = &Renderer::setupWireframeTriangle;
262                                         batch = 1;
263                                         break;
264                                 case FILL_VERTEX:
265                                         setupPrimitives = &Renderer::setupVertexTriangle;
266                                         batch = 1;
267                                         break;
268                                 default:
269                                         ASSERT(false);
270                                         return;
271                                 }
272                         }
273                         else if(context->isDrawLine())
274                         {
275                                 setupPrimitives = &Renderer::setupLines;
276                         }
277                         else   // Point draw
278                         {
279                                 setupPrimitives = &Renderer::setupPoints;
280                         }
281
282                         DrawCall *draw = 0;
283
284                         do
285                         {
286                                 for(int i = 0; i < DRAW_COUNT; i++)
287                                 {
288                                         if(drawCall[i]->references == -1)
289                                         {
290                                                 draw = drawCall[i];
291                                                 drawList[nextDraw % DRAW_COUNT] = draw;
292
293                                                 break;
294                                         }
295                                 }
296
297                                 if(!draw)
298                                 {
299                                         resumeApp->wait();
300                                 }
301                         }
302                         while(!draw);
303
304                         DrawData *data = draw->data;
305
306                         if(queries.size() != 0)
307                         {
308                                 draw->queries = new std::list<Query*>();
309                                 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
310                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
311                                 {
312                                         Query* q = *query;
313                                         if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
314                                         {
315                                                 atomicIncrement(&(q->reference));
316                                                 draw->queries->push_back(q);
317                                         }
318                                 }
319                         }
320
321                         draw->drawType = drawType;
322                         draw->batchSize = batch;
323
324                         vertexRoutine->bind();
325                         setupRoutine->bind();
326                         pixelRoutine->bind();
327
328                         draw->vertexRoutine = vertexRoutine;
329                         draw->setupRoutine = setupRoutine;
330                         draw->pixelRoutine = pixelRoutine;
331                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
332                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
333                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
334                         draw->setupPrimitives = setupPrimitives;
335                         draw->setupState = setupState;
336
337                         for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
338                         {
339                                 draw->vertexStream[i] = context->input[i].resource;
340                                 data->input[i] = context->input[i].buffer;
341                                 data->stride[i] = context->input[i].stride;
342
343                                 if(draw->vertexStream[i])
344                                 {
345                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
346                                 }
347                         }
348
349                         if(context->indexBuffer)
350                         {
351                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
352                         }
353
354                         draw->indexBuffer = context->indexBuffer;
355
356                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
357                         {
358                                 draw->texture[sampler] = 0;
359                         }
360
361                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
362                         {
363                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
364                                 {
365                                         draw->texture[sampler] = context->texture[sampler];
366                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
367
368                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
369                                 }
370                         }
371
372                         if(context->pixelShader)
373                         {
374                                 if(draw->psDirtyConstF)
375                                 {
376                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
377                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
378                                         draw->psDirtyConstF = 0;
379                                 }
380
381                                 if(draw->psDirtyConstI)
382                                 {
383                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
384                                         draw->psDirtyConstI = 0;
385                                 }
386
387                                 if(draw->psDirtyConstB)
388                                 {
389                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
390                                         draw->psDirtyConstB = 0;
391                                 }
392
393                                 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
394                         }
395                         else
396                         {
397                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
398                                 {
399                                         draw->pUniformBuffers[i] = nullptr;
400                                 }
401                         }
402
403                         if(context->pixelShaderVersion() <= 0x0104)
404                         {
405                                 for(int stage = 0; stage < 8; stage++)
406                                 {
407                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
408                                         {
409                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
410                                         }
411                                         else break;
412                                 }
413                         }
414
415                         if(context->vertexShader)
416                         {
417                                 if(context->vertexShader->getVersion() >= 0x0300)
418                                 {
419                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
420                                         {
421                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
422                                                 {
423                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
424                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
425
426                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
427                                                 }
428                                         }
429                                 }
430
431                                 if(draw->vsDirtyConstF)
432                                 {
433                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
434                                         draw->vsDirtyConstF = 0;
435                                 }
436
437                                 if(draw->vsDirtyConstI)
438                                 {
439                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
440                                         draw->vsDirtyConstI = 0;
441                                 }
442
443                                 if(draw->vsDirtyConstB)
444                                 {
445                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
446                                         draw->vsDirtyConstB = 0;
447                                 }
448
449                                 if(context->vertexShader->isInstanceIdDeclared())
450                                 {
451                                         data->instanceID = context->instanceID;
452                                 }
453
454                                 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
455                                 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
456                         }
457                         else
458                         {
459                                 data->ff = ff;
460
461                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
462                                 draw->vsDirtyConstI = 16;
463                                 draw->vsDirtyConstB = 16;
464
465                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
466                                 {
467                                         draw->vUniformBuffers[i] = nullptr;
468                                 }
469
470                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
471                                 {
472                                         draw->transformFeedbackBuffers[i] = nullptr;
473                                 }
474                         }
475
476                         if(pixelState.stencilActive)
477                         {
478                                 data->stencil[0] = stencil;
479                                 data->stencil[1] = stencilCCW;
480                         }
481
482                         if(pixelState.fogActive)
483                         {
484                                 data->fog = fog;
485                         }
486
487                         if(setupState.isDrawPoint)
488                         {
489                                 data->point = point;
490                         }
491
492                         data->lineWidth = context->lineWidth;
493
494                         data->factor = factor;
495
496                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
497                         {
498                                 float ref = context->alphaReference * (1.0f / 255.0f);
499                                 float margin = sw::min(ref, 1.0f - ref);
500
501                                 if(ms == 4)
502                                 {
503                                         data->a2c0 = replicate(ref - margin * 0.6f);
504                                         data->a2c1 = replicate(ref - margin * 0.2f);
505                                         data->a2c2 = replicate(ref + margin * 0.2f);
506                                         data->a2c3 = replicate(ref + margin * 0.6f);
507                                 }
508                                 else if(ms == 2)
509                                 {
510                                         data->a2c0 = replicate(ref - margin * 0.3f);
511                                         data->a2c1 = replicate(ref + margin * 0.3f);
512                                 }
513                                 else ASSERT(false);
514                         }
515
516                         if(pixelState.occlusionEnabled)
517                         {
518                                 for(int cluster = 0; cluster < clusterCount; cluster++)
519                                 {
520                                         data->occlusion[cluster] = 0;
521                                 }
522                         }
523
524                         #if PERF_PROFILE
525                                 for(int cluster = 0; cluster < clusterCount; cluster++)
526                                 {
527                                         for(int i = 0; i < PERF_TIMERS; i++)
528                                         {
529                                                 data->cycles[i][cluster] = 0;
530                                         }
531                                 }
532                         #endif
533
534                         // Viewport
535                         {
536                                 float W = 0.5f * viewport.width;
537                                 float H = 0.5f * viewport.height;
538                                 float X0 = viewport.x0 + W;
539                                 float Y0 = viewport.y0 + H;
540                                 float N = viewport.minZ;
541                                 float F = viewport.maxZ;
542                                 float Z = F - N;
543
544                                 if(context->isDrawTriangle(false))
545                                 {
546                                         N += depthBias;
547                                 }
548
549                                 if(complementaryDepthBuffer)
550                                 {
551                                         Z = -Z;
552                                         N = 1 - N;
553                                 }
554
555                                 static const float X[5][16] =   // Fragment offsets
556                                 {
557                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
558                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
559                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
560                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
561                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
562                                 };
563
564                                 static const float Y[5][16] =   // Fragment offsets
565                                 {
566                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
567                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
568                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
569                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
570                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
571                                 };
572
573                                 int s = sw::log2(ss);
574
575                                 data->Wx16 = replicate(W * 16);
576                                 data->Hx16 = replicate(H * 16);
577                                 data->X0x16 = replicate(X0 * 16 - 8);
578                                 data->Y0x16 = replicate(Y0 * 16 - 8);
579                                 data->XXXX = replicate(X[s][q] / W);
580                                 data->YYYY = replicate(Y[s][q] / H);
581                                 data->halfPixelX = replicate(0.5f / W);
582                                 data->halfPixelY = replicate(0.5f / H);
583                                 data->viewportHeight = abs(viewport.height);
584                                 data->slopeDepthBias = slopeDepthBias;
585                                 data->depthRange = Z;
586                                 data->depthNear = N;
587                                 draw->clipFlags = clipFlags;
588
589                                 if(clipFlags)
590                                 {
591                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
592                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
593                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
594                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
595                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
596                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
597                                 }
598                         }
599
600                         // Target
601                         {
602                                 for(int index = 0; index < RENDERTARGETS; index++)
603                                 {
604                                         draw->renderTarget[index] = context->renderTarget[index];
605
606                                         if(draw->renderTarget[index])
607                                         {
608                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
609                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
610                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
611                                         }
612                                 }
613
614                                 draw->depthBuffer = context->depthBuffer;
615                                 draw->stencilBuffer = context->stencilBuffer;
616
617                                 if(draw->depthBuffer)
618                                 {
619                                         data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
620                                         data->depthPitchB = context->depthBuffer->getInternalPitchB();
621                                         data->depthSliceB = context->depthBuffer->getInternalSliceB();
622                                 }
623
624                                 if(draw->stencilBuffer)
625                                 {
626                                         data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
627                                         data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
628                                         data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
629                                 }
630                         }
631
632                         // Scissor
633                         {
634                                 data->scissorX0 = scissor.x0;
635                                 data->scissorX1 = scissor.x1;
636                                 data->scissorY0 = scissor.y0;
637                                 data->scissorY1 = scissor.y1;
638                         }
639
640                         draw->primitive = 0;
641                         draw->count = count;
642
643                         draw->references = (count + batch - 1) / batch;
644
645                         schedulerMutex.lock();
646                         nextDraw++;
647                         schedulerMutex.unlock();
648
649                         #ifndef NDEBUG
650                         if(threadCount == 1)   // Use main thread for draw execution
651                         {
652                                 threadsAwake = 1;
653                                 task[0].type = Task::RESUME;
654
655                                 taskLoop(0);
656                         }
657                         else
658                         #endif
659                         {
660                                 if(!threadsAwake)
661                                 {
662                                         suspend[0]->wait();
663
664                                         threadsAwake = 1;
665                                         task[0].type = Task::RESUME;
666
667                                         resume[0]->signal();
668                                 }
669                         }
670                 }
671         }
672
673         void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
674         {
675                 blitter->clear(pixel, format, dest, dRect, rgbaMask);
676         }
677
678         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
679         {
680                 blitter->blit(source, sRect, dest, dRect, filter, isStencil);
681         }
682
683         void Renderer::blit3D(Surface *source, Surface *dest)
684         {
685                 blitter->blit3D(source, dest);
686         }
687
688         void Renderer::threadFunction(void *parameters)
689         {
690                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
691                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
692
693                 if(logPrecision < IEEE)
694                 {
695                         CPUID::setFlushToZero(true);
696                         CPUID::setDenormalsAreZero(true);
697                 }
698
699                 renderer->threadLoop(threadIndex);
700         }
701
702         void Renderer::threadLoop(int threadIndex)
703         {
704                 while(!exitThreads)
705                 {
706                         taskLoop(threadIndex);
707
708                         suspend[threadIndex]->signal();
709                         resume[threadIndex]->wait();
710                 }
711         }
712
713         void Renderer::taskLoop(int threadIndex)
714         {
715                 while(task[threadIndex].type != Task::SUSPEND)
716                 {
717                         scheduleTask(threadIndex);
718                         executeTask(threadIndex);
719                 }
720         }
721
722         void Renderer::findAvailableTasks()
723         {
724                 // Find pixel tasks
725                 for(int cluster = 0; cluster < clusterCount; cluster++)
726                 {
727                         if(!pixelProgress[cluster].executing)
728                         {
729                                 for(int unit = 0; unit < unitCount; unit++)
730                                 {
731                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
732                                         {
733                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
734                                                 {
735                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
736                                                         {
737                                                                 Task &task = taskQueue[qHead];
738                                                                 task.type = Task::PIXELS;
739                                                                 task.primitiveUnit = unit;
740                                                                 task.pixelCluster = cluster;
741
742                                                                 pixelProgress[cluster].executing = true;
743
744                                                                 // Commit to the task queue
745                                                                 qHead = (qHead + 1) % 32;
746                                                                 qSize++;
747
748                                                                 break;
749                                                         }
750                                                 }
751                                         }
752                                 }
753                         }
754                 }
755
756                 // Find primitive tasks
757                 if(currentDraw == nextDraw)
758                 {
759                         return;   // No more primitives to process
760                 }
761
762                 for(int unit = 0; unit < unitCount; unit++)
763                 {
764                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
765
766                         if(draw->primitive >= draw->count)
767                         {
768                                 currentDraw++;
769
770                                 if(currentDraw == nextDraw)
771                                 {
772                                         return;   // No more primitives to process
773                                 }
774
775                                 draw = drawList[currentDraw % DRAW_COUNT];
776                         }
777
778                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
779                         {
780                                 int primitive = draw->primitive;
781                                 int count = draw->count;
782                                 int batch = draw->batchSize;
783
784                                 primitiveProgress[unit].drawCall = currentDraw;
785                                 primitiveProgress[unit].firstPrimitive = primitive;
786                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
787
788                                 draw->primitive += batch;
789
790                                 Task &task = taskQueue[qHead];
791                                 task.type = Task::PRIMITIVES;
792                                 task.primitiveUnit = unit;
793
794                                 primitiveProgress[unit].references = -1;
795
796                                 // Commit to the task queue
797                                 qHead = (qHead + 1) % 32;
798                                 qSize++;
799                         }
800                 }
801         }
802
803         void Renderer::scheduleTask(int threadIndex)
804         {
805                 schedulerMutex.lock();
806
807                 if((int)qSize < threadCount - threadsAwake + 1)
808                 {
809                         findAvailableTasks();
810                 }
811
812                 if(qSize != 0)
813                 {
814                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
815                         qSize--;
816
817                         if(threadsAwake != threadCount)
818                         {
819                                 int wakeup = qSize - threadsAwake + 1;
820
821                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
822                                 {
823                                         if(task[i].type == Task::SUSPEND)
824                                         {
825                                                 suspend[i]->wait();
826                                                 task[i].type = Task::RESUME;
827                                                 resume[i]->signal();
828
829                                                 threadsAwake++;
830                                                 wakeup--;
831                                         }
832                                 }
833                         }
834                 }
835                 else
836                 {
837                         task[threadIndex].type = Task::SUSPEND;
838
839                         threadsAwake--;
840                 }
841
842                 schedulerMutex.unlock();
843         }
844
845         void Renderer::executeTask(int threadIndex)
846         {
847                 #if PERF_HUD
848                         int64_t startTick = Timer::ticks();
849                 #endif
850
851                 switch(task[threadIndex].type)
852                 {
853                 case Task::PRIMITIVES:
854                         {
855                                 int unit = task[threadIndex].primitiveUnit;
856
857                                 int input = primitiveProgress[unit].firstPrimitive;
858                                 int count = primitiveProgress[unit].primitiveCount;
859                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
860                                 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
861
862                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
863
864                                 #if PERF_HUD
865                                         int64_t time = Timer::ticks();
866                                         vertexTime[threadIndex] += time - startTick;
867                                         startTick = time;
868                                 #endif
869
870                                 int visible = 0;
871
872                                 if(!draw->setupState.rasterizerDiscard)
873                                 {
874                                         visible = (this->*setupPrimitives)(unit, count);
875                                 }
876
877                                 primitiveProgress[unit].visible = visible;
878                                 primitiveProgress[unit].references = clusterCount;
879
880                                 #if PERF_HUD
881                                         setupTime[threadIndex] += Timer::ticks() - startTick;
882                                 #endif
883                         }
884                         break;
885                 case Task::PIXELS:
886                         {
887                                 int unit = task[threadIndex].primitiveUnit;
888                                 int visible = primitiveProgress[unit].visible;
889
890                                 if(visible > 0)
891                                 {
892                                         int cluster = task[threadIndex].pixelCluster;
893                                         Primitive *primitive = primitiveBatch[unit];
894                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
895                                         DrawData *data = draw->data;
896                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
897
898                                         pixelRoutine(primitive, visible, cluster, data);
899                                 }
900
901                                 finishRendering(task[threadIndex]);
902
903                                 #if PERF_HUD
904                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
905                                 #endif
906                         }
907                         break;
908                 case Task::RESUME:
909                         break;
910                 case Task::SUSPEND:
911                         break;
912                 default:
913                         ASSERT(false);
914                 }
915         }
916
917         void Renderer::synchronize()
918         {
919                 sync->lock(sw::PUBLIC);
920                 sync->unlock();
921         }
922
923         void Renderer::finishRendering(Task &pixelTask)
924         {
925                 int unit = pixelTask.primitiveUnit;
926                 int cluster = pixelTask.pixelCluster;
927
928                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
929                 DrawData &data = *draw.data;
930                 int primitive = primitiveProgress[unit].firstPrimitive;
931                 int count = primitiveProgress[unit].primitiveCount;
932                 int processedPrimitives = primitive + count;
933
934                 pixelProgress[cluster].processedPrimitives = processedPrimitives;
935
936                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
937                 {
938                         pixelProgress[cluster].drawCall++;
939                         pixelProgress[cluster].processedPrimitives = 0;
940                 }
941
942                 int ref = atomicDecrement(&primitiveProgress[unit].references);
943
944                 if(ref == 0)
945                 {
946                         ref = atomicDecrement(&draw.references);
947
948                         if(ref == 0)
949                         {
950                                 #if PERF_PROFILE
951                                         for(int cluster = 0; cluster < clusterCount; cluster++)
952                                         {
953                                                 for(int i = 0; i < PERF_TIMERS; i++)
954                                                 {
955                                                         profiler.cycles[i] += data.cycles[i][cluster];
956                                                 }
957                                         }
958                                 #endif
959
960                                 if(draw.queries)
961                                 {
962                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
963                                         {
964                                                 Query *query = *q;
965
966                                                 switch(query->type)
967                                                 {
968                                                 case Query::FRAGMENTS_PASSED:
969                                                         for(int cluster = 0; cluster < clusterCount; cluster++)
970                                                         {
971                                                                 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
972                                                         }
973                                                         break;
974                                                 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
975                                                         atomicAdd((volatile int*)&query->data, processedPrimitives);
976                                                         break;
977                                                 default:
978                                                         break;
979                                                 }
980
981                                                 atomicDecrement(&query->reference);
982                                         }
983
984                                         delete draw.queries;
985                                         draw.queries = 0;
986                                 }
987
988                                 for(int i = 0; i < RENDERTARGETS; i++)
989                                 {
990                                         if(draw.renderTarget[i])
991                                         {
992                                                 draw.renderTarget[i]->unlockInternal();
993                                         }
994                                 }
995
996                                 if(draw.depthBuffer)
997                                 {
998                                         draw.depthBuffer->unlockInternal();
999                                 }
1000
1001                                 if(draw.stencilBuffer)
1002                                 {
1003                                         draw.stencilBuffer->unlockStencil();
1004                                 }
1005
1006                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
1007                                 {
1008                                         if(draw.texture[i])
1009                                         {
1010                                                 draw.texture[i]->unlock();
1011                                         }
1012                                 }
1013
1014                                 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
1015                                 {
1016                                         if(draw.vertexStream[i])
1017                                         {
1018                                                 draw.vertexStream[i]->unlock();
1019                                         }
1020                                 }
1021
1022                                 if(draw.indexBuffer)
1023                                 {
1024                                         draw.indexBuffer->unlock();
1025                                 }
1026
1027                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1028                                 {
1029                                         if(draw.pUniformBuffers[i])
1030                                         {
1031                                                 draw.pUniformBuffers[i]->unlock();
1032                                         }
1033                                         if(draw.vUniformBuffers[i])
1034                                         {
1035                                                 draw.vUniformBuffers[i]->unlock();
1036                                         }
1037                                 }
1038
1039                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1040                                 {
1041                                         if(draw.transformFeedbackBuffers[i])
1042                                         {
1043                                                 draw.transformFeedbackBuffers[i]->unlock();
1044                                         }
1045                                 }
1046
1047                                 draw.vertexRoutine->unbind();
1048                                 draw.setupRoutine->unbind();
1049                                 draw.pixelRoutine->unbind();
1050
1051                                 sync->unlock();
1052
1053                                 draw.references = -1;
1054                                 resumeApp->signal();
1055                         }
1056                 }
1057
1058                 pixelProgress[cluster].executing = false;
1059         }
1060
1061         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1062         {
1063                 Triangle *triangle = triangleBatch[unit];
1064                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1065                 DrawData *data = draw->data;
1066                 VertexTask *task = vertexTask[thread];
1067
1068                 const void *indices = data->indices;
1069                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1070
1071                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1072                 {
1073                         task->vertexCache.clear();
1074                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1075                 }
1076
1077                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1078
1079                 switch(draw->drawType)
1080                 {
1081                 case DRAW_POINTLIST:
1082                         {
1083                                 unsigned int index = start;
1084
1085                                 for(unsigned int i = 0; i < triangleCount; i++)
1086                                 {
1087                                         batch[i][0] = index;
1088                                         batch[i][1] = index;
1089                                         batch[i][2] = index;
1090
1091                                         index += 1;
1092                                 }
1093                         }
1094                         break;
1095                 case DRAW_LINELIST:
1096                         {
1097                                 unsigned int index = 2 * start;
1098
1099                                 for(unsigned int i = 0; i < triangleCount; i++)
1100                                 {
1101                                         batch[i][0] = index + 0;
1102                                         batch[i][1] = index + 1;
1103                                         batch[i][2] = index + 1;
1104
1105                                         index += 2;
1106                                 }
1107                         }
1108                         break;
1109                 case DRAW_LINESTRIP:
1110                         {
1111                                 unsigned int index = start;
1112
1113                                 for(unsigned int i = 0; i < triangleCount; i++)
1114                                 {
1115                                         batch[i][0] = index + 0;
1116                                         batch[i][1] = index + 1;
1117                                         batch[i][2] = index + 1;
1118
1119                                         index += 1;
1120                                 }
1121                         }
1122                         break;
1123                 case DRAW_LINELOOP:
1124                         {
1125                                 unsigned int index = start;
1126
1127                                 for(unsigned int i = 0; i < triangleCount; i++)
1128                                 {
1129                                         batch[i][0] = (index + 0) % loop;
1130                                         batch[i][1] = (index + 1) % loop;
1131                                         batch[i][2] = (index + 1) % loop;
1132
1133                                         index += 1;
1134                                 }
1135                         }
1136                         break;
1137                 case DRAW_TRIANGLELIST:
1138                         {
1139                                 unsigned int index = 3 * start;
1140
1141                                 for(unsigned int i = 0; i < triangleCount; i++)
1142                                 {
1143                                         batch[i][0] = index + 0;
1144                                         batch[i][1] = index + 1;
1145                                         batch[i][2] = index + 2;
1146
1147                                         index += 3;
1148                                 }
1149                         }
1150                         break;
1151                 case DRAW_TRIANGLESTRIP:
1152                         {
1153                                 unsigned int index = start;
1154
1155                                 for(unsigned int i = 0; i < triangleCount; i++)
1156                                 {
1157                                         batch[i][0] = index + 0;
1158                                         batch[i][1] = index + (index & 1) + 1;
1159                                         batch[i][2] = index + (~index & 1) + 1;
1160
1161                                         index += 1;
1162                                 }
1163                         }
1164                         break;
1165                 case DRAW_TRIANGLEFAN:
1166                         {
1167                                 unsigned int index = start;
1168
1169                                 for(unsigned int i = 0; i < triangleCount; i++)
1170                                 {
1171                                         batch[i][0] = index + 1;
1172                                         batch[i][1] = index + 2;
1173                                         batch[i][2] = 0;
1174
1175                                         index += 1;
1176                                 }
1177                         }
1178                         break;
1179                 case DRAW_INDEXEDPOINTLIST8:
1180                         {
1181                                 const unsigned char *index = (const unsigned char*)indices + start;
1182
1183                                 for(unsigned int i = 0; i < triangleCount; i++)
1184                                 {
1185                                         batch[i][0] = *index;
1186                                         batch[i][1] = *index;
1187                                         batch[i][2] = *index;
1188
1189                                         index += 1;
1190                                 }
1191                         }
1192                         break;
1193                 case DRAW_INDEXEDPOINTLIST16:
1194                         {
1195                                 const unsigned short *index = (const unsigned short*)indices + start;
1196
1197                                 for(unsigned int i = 0; i < triangleCount; i++)
1198                                 {
1199                                         batch[i][0] = *index;
1200                                         batch[i][1] = *index;
1201                                         batch[i][2] = *index;
1202
1203                                         index += 1;
1204                                 }
1205                         }
1206                         break;
1207                 case DRAW_INDEXEDPOINTLIST32:
1208                         {
1209                                 const unsigned int *index = (const unsigned int*)indices + start;
1210
1211                                 for(unsigned int i = 0; i < triangleCount; i++)
1212                                 {
1213                                         batch[i][0] = *index;
1214                                         batch[i][1] = *index;
1215                                         batch[i][2] = *index;
1216
1217                                         index += 1;
1218                                 }
1219                         }
1220                         break;
1221                 case DRAW_INDEXEDLINELIST8:
1222                         {
1223                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1224
1225                                 for(unsigned int i = 0; i < triangleCount; i++)
1226                                 {
1227                                         batch[i][0] = index[0];
1228                                         batch[i][1] = index[1];
1229                                         batch[i][2] = index[1];
1230
1231                                         index += 2;
1232                                 }
1233                         }
1234                         break;
1235                 case DRAW_INDEXEDLINELIST16:
1236                         {
1237                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1238
1239                                 for(unsigned int i = 0; i < triangleCount; i++)
1240                                 {
1241                                         batch[i][0] = index[0];
1242                                         batch[i][1] = index[1];
1243                                         batch[i][2] = index[1];
1244
1245                                         index += 2;
1246                                 }
1247                         }
1248                         break;
1249                 case DRAW_INDEXEDLINELIST32:
1250                         {
1251                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1252
1253                                 for(unsigned int i = 0; i < triangleCount; i++)
1254                                 {
1255                                         batch[i][0] = index[0];
1256                                         batch[i][1] = index[1];
1257                                         batch[i][2] = index[1];
1258
1259                                         index += 2;
1260                                 }
1261                         }
1262                         break;
1263                 case DRAW_INDEXEDLINESTRIP8:
1264                         {
1265                                 const unsigned char *index = (const unsigned char*)indices + start;
1266
1267                                 for(unsigned int i = 0; i < triangleCount; i++)
1268                                 {
1269                                         batch[i][0] = index[0];
1270                                         batch[i][1] = index[1];
1271                                         batch[i][2] = index[1];
1272
1273                                         index += 1;
1274                                 }
1275                         }
1276                         break;
1277                 case DRAW_INDEXEDLINESTRIP16:
1278                         {
1279                                 const unsigned short *index = (const unsigned short*)indices + start;
1280
1281                                 for(unsigned int i = 0; i < triangleCount; i++)
1282                                 {
1283                                         batch[i][0] = index[0];
1284                                         batch[i][1] = index[1];
1285                                         batch[i][2] = index[1];
1286
1287                                         index += 1;
1288                                 }
1289                         }
1290                         break;
1291                 case DRAW_INDEXEDLINESTRIP32:
1292                         {
1293                                 const unsigned int *index = (const unsigned int*)indices + start;
1294
1295                                 for(unsigned int i = 0; i < triangleCount; i++)
1296                                 {
1297                                         batch[i][0] = index[0];
1298                                         batch[i][1] = index[1];
1299                                         batch[i][2] = index[1];
1300
1301                                         index += 1;
1302                                 }
1303                         }
1304                         break;
1305                 case DRAW_INDEXEDLINELOOP8:
1306                         {
1307                                 const unsigned char *index = (const unsigned char*)indices;
1308
1309                                 for(unsigned int i = 0; i < triangleCount; i++)
1310                                 {
1311                                         batch[i][0] = index[(start + i + 0) % loop];
1312                                         batch[i][1] = index[(start + i + 1) % loop];
1313                                         batch[i][2] = index[(start + i + 1) % loop];
1314                                 }
1315                         }
1316                         break;
1317                 case DRAW_INDEXEDLINELOOP16:
1318                         {
1319                                 const unsigned short *index = (const unsigned short*)indices;
1320
1321                                 for(unsigned int i = 0; i < triangleCount; i++)
1322                                 {
1323                                         batch[i][0] = index[(start + i + 0) % loop];
1324                                         batch[i][1] = index[(start + i + 1) % loop];
1325                                         batch[i][2] = index[(start + i + 1) % loop];
1326                                 }
1327                         }
1328                         break;
1329                 case DRAW_INDEXEDLINELOOP32:
1330                         {
1331                                 const unsigned int *index = (const unsigned int*)indices;
1332
1333                                 for(unsigned int i = 0; i < triangleCount; i++)
1334                                 {
1335                                         batch[i][0] = index[(start + i + 0) % loop];
1336                                         batch[i][1] = index[(start + i + 1) % loop];
1337                                         batch[i][2] = index[(start + i + 1) % loop];
1338                                 }
1339                         }
1340                         break;
1341                 case DRAW_INDEXEDTRIANGLELIST8:
1342                         {
1343                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1344
1345                                 for(unsigned int i = 0; i < triangleCount; i++)
1346                                 {
1347                                         batch[i][0] = index[0];
1348                                         batch[i][1] = index[1];
1349                                         batch[i][2] = index[2];
1350
1351                                         index += 3;
1352                                 }
1353                         }
1354                         break;
1355                 case DRAW_INDEXEDTRIANGLELIST16:
1356                         {
1357                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1358
1359                                 for(unsigned int i = 0; i < triangleCount; i++)
1360                                 {
1361                                         batch[i][0] = index[0];
1362                                         batch[i][1] = index[1];
1363                                         batch[i][2] = index[2];
1364
1365                                         index += 3;
1366                                 }
1367                         }
1368                         break;
1369                 case DRAW_INDEXEDTRIANGLELIST32:
1370                         {
1371                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1372
1373                                 for(unsigned int i = 0; i < triangleCount; i++)
1374                                 {
1375                                         batch[i][0] = index[0];
1376                                         batch[i][1] = index[1];
1377                                         batch[i][2] = index[2];
1378
1379                                         index += 3;
1380                                 }
1381                         }
1382                         break;
1383                 case DRAW_INDEXEDTRIANGLESTRIP8:
1384                         {
1385                                 const unsigned char *index = (const unsigned char*)indices + start;
1386
1387                                 for(unsigned int i = 0; i < triangleCount; i++)
1388                                 {
1389                                         batch[i][0] = index[0];
1390                                         batch[i][1] = index[((start + i) & 1) + 1];
1391                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1392
1393                                         index += 1;
1394                                 }
1395                         }
1396                         break;
1397                 case DRAW_INDEXEDTRIANGLESTRIP16:
1398                         {
1399                                 const unsigned short *index = (const unsigned short*)indices + start;
1400
1401                                 for(unsigned int i = 0; i < triangleCount; i++)
1402                                 {
1403                                         batch[i][0] = index[0];
1404                                         batch[i][1] = index[((start + i) & 1) + 1];
1405                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1406
1407                                         index += 1;
1408                                 }
1409                         }
1410                         break;
1411                 case DRAW_INDEXEDTRIANGLESTRIP32:
1412                         {
1413                                 const unsigned int *index = (const unsigned int*)indices + start;
1414
1415                                 for(unsigned int i = 0; i < triangleCount; i++)
1416                                 {
1417                                         batch[i][0] = index[0];
1418                                         batch[i][1] = index[((start + i) & 1) + 1];
1419                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1420
1421                                         index += 1;
1422                                 }
1423                         }
1424                         break;
1425                 case DRAW_INDEXEDTRIANGLEFAN8:
1426                         {
1427                                 const unsigned char *index = (const unsigned char*)indices;
1428
1429                                 for(unsigned int i = 0; i < triangleCount; i++)
1430                                 {
1431                                         batch[i][0] = index[start + i + 1];
1432                                         batch[i][1] = index[start + i + 2];
1433                                         batch[i][2] = index[0];
1434                                 }
1435                         }
1436                         break;
1437                 case DRAW_INDEXEDTRIANGLEFAN16:
1438                         {
1439                                 const unsigned short *index = (const unsigned short*)indices;
1440
1441                                 for(unsigned int i = 0; i < triangleCount; i++)
1442                                 {
1443                                         batch[i][0] = index[start + i + 1];
1444                                         batch[i][1] = index[start + i + 2];
1445                                         batch[i][2] = index[0];
1446                                 }
1447                         }
1448                         break;
1449                 case DRAW_INDEXEDTRIANGLEFAN32:
1450                         {
1451                                 const unsigned int *index = (const unsigned int*)indices;
1452
1453                                 for(unsigned int i = 0; i < triangleCount; i++)
1454                                 {
1455                                         batch[i][0] = index[start + i + 1];
1456                                         batch[i][1] = index[start + i + 2];
1457                                         batch[i][2] = index[0];
1458                                 }
1459                         }
1460                         break;
1461                 case DRAW_QUADLIST:
1462                         {
1463                                 unsigned int index = 4 * start / 2;
1464
1465                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1466                                 {
1467                                         batch[i+0][0] = index + 0;
1468                                         batch[i+0][1] = index + 1;
1469                                         batch[i+0][2] = index + 2;
1470
1471                                         batch[i+1][0] = index + 0;
1472                                         batch[i+1][1] = index + 2;
1473                                         batch[i+1][2] = index + 3;
1474
1475                                         index += 4;
1476                                 }
1477                         }
1478                         break;
1479                 default:
1480                         ASSERT(false);
1481                         return;
1482                 }
1483
1484                 task->primitiveStart = start;
1485                 task->vertexCount = triangleCount * 3;
1486                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1487         }
1488
1489         int Renderer::setupSolidTriangles(int unit, int count)
1490         {
1491                 Triangle *triangle = triangleBatch[unit];
1492                 Primitive *primitive = primitiveBatch[unit];
1493
1494                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1495                 SetupProcessor::State &state = draw.setupState;
1496                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1497
1498                 int ms = state.multiSample;
1499                 int pos = state.positionRegister;
1500                 const DrawData *data = draw.data;
1501                 int visible = 0;
1502
1503                 for(int i = 0; i < count; i++, triangle++)
1504                 {
1505                         Vertex &v0 = triangle->v0;
1506                         Vertex &v1 = triangle->v1;
1507                         Vertex &v2 = triangle->v2;
1508
1509                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1510                         {
1511                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1512
1513                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1514
1515                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1516                                 {
1517                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1518                                         {
1519                                                 continue;
1520                                         }
1521                                 }
1522
1523                                 if(setupRoutine(primitive, triangle, &polygon, data))
1524                                 {
1525                                         primitive += ms;
1526                                         visible++;
1527                                 }
1528                         }
1529                 }
1530
1531                 return visible;
1532         }
1533
1534         int Renderer::setupWireframeTriangle(int unit, int count)
1535         {
1536                 Triangle *triangle = triangleBatch[unit];
1537                 Primitive *primitive = primitiveBatch[unit];
1538                 int visible = 0;
1539
1540                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1541                 SetupProcessor::State &state = draw.setupState;
1542
1543                 const Vertex &v0 = triangle[0].v0;
1544                 const Vertex &v1 = triangle[0].v1;
1545                 const Vertex &v2 = triangle[0].v2;
1546
1547                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1548
1549                 if(state.cullMode == CULL_CLOCKWISE)
1550                 {
1551                         if(d >= 0) return 0;
1552                 }
1553                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1554                 {
1555                         if(d <= 0) return 0;
1556                 }
1557
1558                 // Copy attributes
1559                 triangle[1].v0 = v1;
1560                 triangle[1].v1 = v2;
1561                 triangle[2].v0 = v2;
1562                 triangle[2].v1 = v0;
1563
1564                 if(state.color[0][0].flat)   // FIXME
1565                 {
1566                         for(int i = 0; i < 2; i++)
1567                         {
1568                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1569                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1570                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1571                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1572                         }
1573                 }
1574
1575                 for(int i = 0; i < 3; i++)
1576                 {
1577                         if(setupLine(*primitive, *triangle, draw))
1578                         {
1579                                 primitive->area = 0.5f * d;
1580
1581                                 primitive++;
1582                                 visible++;
1583                         }
1584
1585                         triangle++;
1586                 }
1587
1588                 return visible;
1589         }
1590
1591         int Renderer::setupVertexTriangle(int unit, int count)
1592         {
1593                 Triangle *triangle = triangleBatch[unit];
1594                 Primitive *primitive = primitiveBatch[unit];
1595                 int visible = 0;
1596
1597                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1598                 SetupProcessor::State &state = draw.setupState;
1599
1600                 const Vertex &v0 = triangle[0].v0;
1601                 const Vertex &v1 = triangle[0].v1;
1602                 const Vertex &v2 = triangle[0].v2;
1603
1604                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1605
1606                 if(state.cullMode == CULL_CLOCKWISE)
1607                 {
1608                         if(d >= 0) return 0;
1609                 }
1610                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1611                 {
1612                         if(d <= 0) return 0;
1613                 }
1614
1615                 // Copy attributes
1616                 triangle[1].v0 = v1;
1617                 triangle[2].v0 = v2;
1618
1619                 for(int i = 0; i < 3; i++)
1620                 {
1621                         if(setupPoint(*primitive, *triangle, draw))
1622                         {
1623                                 primitive->area = 0.5f * d;
1624
1625                                 primitive++;
1626                                 visible++;
1627                         }
1628
1629                         triangle++;
1630                 }
1631
1632                 return visible;
1633         }
1634
1635         int Renderer::setupLines(int unit, int count)
1636         {
1637                 Triangle *triangle = triangleBatch[unit];
1638                 Primitive *primitive = primitiveBatch[unit];
1639                 int visible = 0;
1640
1641                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1642                 SetupProcessor::State &state = draw.setupState;
1643
1644                 int ms = state.multiSample;
1645
1646                 for(int i = 0; i < count; i++)
1647                 {
1648                         if(setupLine(*primitive, *triangle, draw))
1649                         {
1650                                 primitive += ms;
1651                                 visible++;
1652                         }
1653
1654                         triangle++;
1655                 }
1656
1657                 return visible;
1658         }
1659
1660         int Renderer::setupPoints(int unit, int count)
1661         {
1662                 Triangle *triangle = triangleBatch[unit];
1663                 Primitive *primitive = primitiveBatch[unit];
1664                 int visible = 0;
1665
1666                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1667                 SetupProcessor::State &state = draw.setupState;
1668
1669                 int ms = state.multiSample;
1670
1671                 for(int i = 0; i < count; i++)
1672                 {
1673                         if(setupPoint(*primitive, *triangle, draw))
1674                         {
1675                                 primitive += ms;
1676                                 visible++;
1677                         }
1678
1679                         triangle++;
1680                 }
1681
1682                 return visible;
1683         }
1684
1685         bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1686         {
1687                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1688                 const SetupProcessor::State &state = draw.setupState;
1689                 const DrawData &data = *draw.data;
1690
1691                 float lineWidth = data.lineWidth;
1692
1693                 Vertex &v0 = triangle.v0;
1694                 Vertex &v1 = triangle.v1;
1695
1696                 int pos = state.positionRegister;
1697
1698                 const float4 &P0 = v0.v[pos];
1699                 const float4 &P1 = v1.v[pos];
1700
1701                 if(P0.w <= 0 && P1.w <= 0)
1702                 {
1703                         return false;
1704                 }
1705
1706                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1707                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1708
1709                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1710                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1711
1712                 if(dx == 0 && dy == 0)
1713                 {
1714                         return false;
1715                 }
1716
1717                 if(false)   // Rectangle
1718                 {
1719                         float4 P[4];
1720                         int C[4];
1721
1722                         P[0] = P0;
1723                         P[1] = P1;
1724                         P[2] = P1;
1725                         P[3] = P0;
1726
1727                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1728
1729                         dx *= scale;
1730                         dy *= scale;
1731
1732                         float dx0w = dx * P0.w / W;
1733                         float dy0h = dy * P0.w / H;
1734                         float dx0h = dx * P0.w / H;
1735                         float dy0w = dy * P0.w / W;
1736
1737                         float dx1w = dx * P1.w / W;
1738                         float dy1h = dy * P1.w / H;
1739                         float dx1h = dx * P1.w / H;
1740                         float dy1w = dy * P1.w / W;
1741
1742                         P[0].x += -dy0w + -dx0w;
1743                         P[0].y += -dx0h + +dy0h;
1744                         C[0] = clipper->computeClipFlags(P[0]);
1745
1746                         P[1].x += -dy1w + +dx1w;
1747                         P[1].y += -dx1h + +dy1h;
1748                         C[1] = clipper->computeClipFlags(P[1]);
1749
1750                         P[2].x += +dy1w + +dx1w;
1751                         P[2].y += +dx1h + -dy1h;
1752                         C[2] = clipper->computeClipFlags(P[2]);
1753
1754                         P[3].x += +dy0w + -dx0w;
1755                         P[3].y += +dx0h + +dy0h;
1756                         C[3] = clipper->computeClipFlags(P[3]);
1757
1758                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1759                         {
1760                                 Polygon polygon(P, 4);
1761
1762                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1763
1764                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1765                                 {
1766                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1767                                         {
1768                                                 return false;
1769                                         }
1770                                 }
1771
1772                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1773                         }
1774                 }
1775                 else   // Diamond test convention
1776                 {
1777                         float4 P[8];
1778                         int C[8];
1779
1780                         P[0] = P0;
1781                         P[1] = P0;
1782                         P[2] = P0;
1783                         P[3] = P0;
1784                         P[4] = P1;
1785                         P[5] = P1;
1786                         P[6] = P1;
1787                         P[7] = P1;
1788
1789                         float dx0 = lineWidth * 0.5f * P0.w / W;
1790                         float dy0 = lineWidth * 0.5f * P0.w / H;
1791
1792                         float dx1 = lineWidth * 0.5f * P1.w / W;
1793                         float dy1 = lineWidth * 0.5f * P1.w / H;
1794
1795                         P[0].x += -dx0;
1796                         C[0] = clipper->computeClipFlags(P[0]);
1797
1798                         P[1].y += +dy0;
1799                         C[1] = clipper->computeClipFlags(P[1]);
1800
1801                         P[2].x += +dx0;
1802                         C[2] = clipper->computeClipFlags(P[2]);
1803
1804                         P[3].y += -dy0;
1805                         C[3] = clipper->computeClipFlags(P[3]);
1806
1807                         P[4].x += -dx1;
1808                         C[4] = clipper->computeClipFlags(P[4]);
1809
1810                         P[5].y += +dy1;
1811                         C[5] = clipper->computeClipFlags(P[5]);
1812
1813                         P[6].x += +dx1;
1814                         C[6] = clipper->computeClipFlags(P[6]);
1815
1816                         P[7].y += -dy1;
1817                         C[7] = clipper->computeClipFlags(P[7]);
1818
1819                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1820                         {
1821                                 float4 L[6];
1822
1823                                 if(dx > -dy)
1824                                 {
1825                                         if(dx > dy)   // Right
1826                                         {
1827                                                 L[0] = P[0];
1828                                                 L[1] = P[1];
1829                                                 L[2] = P[5];
1830                                                 L[3] = P[6];
1831                                                 L[4] = P[7];
1832                                                 L[5] = P[3];
1833                                         }
1834                                         else   // Down
1835                                         {
1836                                                 L[0] = P[0];
1837                                                 L[1] = P[4];
1838                                                 L[2] = P[5];
1839                                                 L[3] = P[6];
1840                                                 L[4] = P[2];
1841                                                 L[5] = P[3];
1842                                         }
1843                                 }
1844                                 else
1845                                 {
1846                                         if(dx > dy)   // Up
1847                                         {
1848                                                 L[0] = P[0];
1849                                                 L[1] = P[1];
1850                                                 L[2] = P[2];
1851                                                 L[3] = P[6];
1852                                                 L[4] = P[7];
1853                                                 L[5] = P[4];
1854                                         }
1855                                         else   // Left
1856                                         {
1857                                                 L[0] = P[1];
1858                                                 L[1] = P[2];
1859                                                 L[2] = P[3];
1860                                                 L[3] = P[7];
1861                                                 L[4] = P[4];
1862                                                 L[5] = P[5];
1863                                         }
1864                                 }
1865
1866                                 Polygon polygon(L, 6);
1867
1868                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1869
1870                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1871                                 {
1872                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1873                                         {
1874                                                 return false;
1875                                         }
1876                                 }
1877
1878                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1879                         }
1880                 }
1881
1882                 return false;
1883         }
1884
1885         bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1886         {
1887                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1888                 const SetupProcessor::State &state = draw.setupState;
1889                 const DrawData &data = *draw.data;
1890
1891                 Vertex &v = triangle.v0;
1892
1893                 float pSize;
1894
1895                 int pts = state.pointSizeRegister;
1896
1897                 if(state.pointSizeRegister != Unused)
1898                 {
1899                         pSize = v.v[pts].y;
1900                 }
1901                 else
1902                 {
1903                         pSize = data.point.pointSize[0];
1904                 }
1905
1906                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1907
1908                 float4 P[4];
1909                 int C[4];
1910
1911                 int pos = state.positionRegister;
1912
1913                 P[0] = v.v[pos];
1914                 P[1] = v.v[pos];
1915                 P[2] = v.v[pos];
1916                 P[3] = v.v[pos];
1917
1918                 const float X = pSize * P[0].w * data.halfPixelX[0];
1919                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1920
1921                 P[0].x -= X;
1922                 P[0].y += Y;
1923                 C[0] = clipper->computeClipFlags(P[0]);
1924
1925                 P[1].x += X;
1926                 P[1].y += Y;
1927                 C[1] = clipper->computeClipFlags(P[1]);
1928
1929                 P[2].x += X;
1930                 P[2].y -= Y;
1931                 C[2] = clipper->computeClipFlags(P[2]);
1932
1933                 P[3].x -= X;
1934                 P[3].y -= Y;
1935                 C[3] = clipper->computeClipFlags(P[3]);
1936
1937                 triangle.v1 = triangle.v0;
1938                 triangle.v2 = triangle.v0;
1939
1940                 triangle.v1.X += iround(16 * 0.5f * pSize);
1941                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1942
1943                 Polygon polygon(P, 4);
1944
1945                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1946                 {
1947                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1948
1949                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1950                         {
1951                                 if(!clipper->clip(polygon, clipFlagsOr, draw))
1952                                 {
1953                                         return false;
1954                                 }
1955                         }
1956
1957                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1958                 }
1959
1960                 return false;
1961         }
1962
1963         void Renderer::initializeThreads()
1964         {
1965                 unitCount = ceilPow2(threadCount);
1966                 clusterCount = ceilPow2(threadCount);
1967
1968                 for(int i = 0; i < unitCount; i++)
1969                 {
1970                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1971                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1972                 }
1973
1974                 for(int i = 0; i < threadCount; i++)
1975                 {
1976                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1977                         vertexTask[i]->vertexCache.drawCall = -1;
1978
1979                         task[i].type = Task::SUSPEND;
1980
1981                         resume[i] = new Event();
1982                         suspend[i] = new Event();
1983
1984                         Parameters parameters;
1985                         parameters.threadIndex = i;
1986                         parameters.renderer = this;
1987
1988                         exitThreads = false;
1989                         worker[i] = new Thread(threadFunction, &parameters);
1990
1991                         suspend[i]->wait();
1992                         suspend[i]->signal();
1993                 }
1994         }
1995
1996         void Renderer::terminateThreads()
1997         {
1998                 while(threadsAwake != 0)
1999                 {
2000                         Thread::sleep(1);
2001                 }
2002
2003                 for(int thread = 0; thread < threadCount; thread++)
2004                 {
2005                         if(worker[thread])
2006                         {
2007                                 exitThreads = true;
2008                                 resume[thread]->signal();
2009                                 worker[thread]->join();
2010
2011                                 delete worker[thread];
2012                                 worker[thread] = 0;
2013                                 delete resume[thread];
2014                                 resume[thread] = 0;
2015                                 delete suspend[thread];
2016                                 suspend[thread] = 0;
2017                         }
2018
2019                         deallocate(vertexTask[thread]);
2020                         vertexTask[thread] = 0;
2021                 }
2022
2023                 for(int i = 0; i < 16; i++)
2024                 {
2025                         deallocate(triangleBatch[i]);
2026                         triangleBatch[i] = 0;
2027
2028                         deallocate(primitiveBatch[i]);
2029                         primitiveBatch[i] = 0;
2030                 }
2031         }
2032
2033         void Renderer::loadConstants(const VertexShader *vertexShader)
2034         {
2035                 if(!vertexShader) return;
2036
2037                 size_t count = vertexShader->getLength();
2038
2039                 for(size_t i = 0; i < count; i++)
2040                 {
2041                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2042
2043                         if(instruction->opcode == Shader::OPCODE_DEF)
2044                         {
2045                                 int index = instruction->dst.index;
2046                                 float value[4];
2047
2048                                 value[0] = instruction->src[0].value[0];
2049                                 value[1] = instruction->src[0].value[1];
2050                                 value[2] = instruction->src[0].value[2];
2051                                 value[3] = instruction->src[0].value[3];
2052
2053                                 setVertexShaderConstantF(index, value);
2054                         }
2055                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2056                         {
2057                                 int index = instruction->dst.index;
2058                                 int integer[4];
2059
2060                                 integer[0] = instruction->src[0].integer[0];
2061                                 integer[1] = instruction->src[0].integer[1];
2062                                 integer[2] = instruction->src[0].integer[2];
2063                                 integer[3] = instruction->src[0].integer[3];
2064
2065                                 setVertexShaderConstantI(index, integer);
2066                         }
2067                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2068                         {
2069                                 int index = instruction->dst.index;
2070                                 int boolean = instruction->src[0].boolean[0];
2071
2072                                 setVertexShaderConstantB(index, &boolean);
2073                         }
2074                 }
2075         }
2076
2077         void Renderer::loadConstants(const PixelShader *pixelShader)
2078         {
2079                 if(!pixelShader) return;
2080
2081                 size_t count = pixelShader->getLength();
2082
2083                 for(size_t i = 0; i < count; i++)
2084                 {
2085                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2086
2087                         if(instruction->opcode == Shader::OPCODE_DEF)
2088                         {
2089                                 int index = instruction->dst.index;
2090                                 float value[4];
2091
2092                                 value[0] = instruction->src[0].value[0];
2093                                 value[1] = instruction->src[0].value[1];
2094                                 value[2] = instruction->src[0].value[2];
2095                                 value[3] = instruction->src[0].value[3];
2096
2097                                 setPixelShaderConstantF(index, value);
2098                         }
2099                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2100                         {
2101                                 int index = instruction->dst.index;
2102                                 int integer[4];
2103
2104                                 integer[0] = instruction->src[0].integer[0];
2105                                 integer[1] = instruction->src[0].integer[1];
2106                                 integer[2] = instruction->src[0].integer[2];
2107                                 integer[3] = instruction->src[0].integer[3];
2108
2109                                 setPixelShaderConstantI(index, integer);
2110                         }
2111                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2112                         {
2113                                 int index = instruction->dst.index;
2114                                 int boolean = instruction->src[0].boolean[0];
2115
2116                                 setPixelShaderConstantB(index, &boolean);
2117                         }
2118                 }
2119         }
2120
2121         void Renderer::setIndexBuffer(Resource *indexBuffer)
2122         {
2123                 context->indexBuffer = indexBuffer;
2124         }
2125
2126         void Renderer::setMultiSampleMask(unsigned int mask)
2127         {
2128                 context->sampleMask = mask;
2129         }
2130
2131         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2132         {
2133                 sw::transparencyAntialiasing = transparencyAntialiasing;
2134         }
2135
2136         bool Renderer::isReadWriteTexture(int sampler)
2137         {
2138                 for(int index = 0; index < RENDERTARGETS; index++)
2139                 {
2140                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2141                         {
2142                                 return true;
2143                         }
2144                 }
2145
2146                 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2147                 {
2148                         return true;
2149                 }
2150
2151                 return false;
2152         }
2153
2154         void Renderer::updateClipper()
2155         {
2156                 if(updateClipPlanes)
2157                 {
2158                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2159                         {
2160                                 const Matrix &scissorWorld = getViewTransform();
2161
2162                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2163                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2164                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2165                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2166                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2167                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2168                         }
2169                         else   // User plane in clip space
2170                         {
2171                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2172                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2173                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2174                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2175                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2176                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2177                         }
2178
2179                         updateClipPlanes = false;
2180                 }
2181         }
2182
2183         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2184         {
2185                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2186
2187                 context->texture[sampler] = resource;
2188         }
2189
2190         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2191         {
2192                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2193
2194                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2195         }
2196
2197         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2198         {
2199                 if(type == SAMPLER_PIXEL)
2200                 {
2201                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2202                 }
2203                 else
2204                 {
2205                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2206                 }
2207         }
2208
2209         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2210         {
2211                 if(type == SAMPLER_PIXEL)
2212                 {
2213                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2214                 }
2215                 else
2216                 {
2217                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2218                 }
2219         }
2220
2221         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2222         {
2223                 if(type == SAMPLER_PIXEL)
2224                 {
2225                         PixelProcessor::setGatherEnable(sampler, enable);
2226                 }
2227                 else
2228                 {
2229                         VertexProcessor::setGatherEnable(sampler, enable);
2230                 }
2231         }
2232
2233         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2234         {
2235                 if(type == SAMPLER_PIXEL)
2236                 {
2237                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2238                 }
2239                 else
2240                 {
2241                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2242                 }
2243         }
2244
2245         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2246         {
2247                 if(type == SAMPLER_PIXEL)
2248                 {
2249                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2250                 }
2251                 else
2252                 {
2253                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2254                 }
2255         }
2256
2257         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2258         {
2259                 if(type == SAMPLER_PIXEL)
2260                 {
2261                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2262                 }
2263                 else
2264                 {
2265                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2266                 }
2267         }
2268
2269         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2270         {
2271                 if(type == SAMPLER_PIXEL)
2272                 {
2273                         PixelProcessor::setReadSRGB(sampler, sRGB);
2274                 }
2275                 else
2276                 {
2277                         VertexProcessor::setReadSRGB(sampler, sRGB);
2278                 }
2279         }
2280
2281         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2282         {
2283                 if(type == SAMPLER_PIXEL)
2284                 {
2285                         PixelProcessor::setMipmapLOD(sampler, bias);
2286                 }
2287                 else
2288                 {
2289                         VertexProcessor::setMipmapLOD(sampler, bias);
2290                 }
2291         }
2292
2293         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2294         {
2295                 if(type == SAMPLER_PIXEL)
2296                 {
2297                         PixelProcessor::setBorderColor(sampler, borderColor);
2298                 }
2299                 else
2300                 {
2301                         VertexProcessor::setBorderColor(sampler, borderColor);
2302                 }
2303         }
2304
2305         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2306         {
2307                 if(type == SAMPLER_PIXEL)
2308                 {
2309                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2310                 }
2311                 else
2312                 {
2313                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2314                 }
2315         }
2316
2317         void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2318         {
2319                 if(type == SAMPLER_PIXEL)
2320                 {
2321                         PixelProcessor::setSwizzleR(sampler, swizzleR);
2322                 }
2323                 else
2324                 {
2325                         VertexProcessor::setSwizzleR(sampler, swizzleR);
2326                 }
2327         }
2328
2329         void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2330         {
2331                 if(type == SAMPLER_PIXEL)
2332                 {
2333                         PixelProcessor::setSwizzleG(sampler, swizzleG);
2334                 }
2335                 else
2336                 {
2337                         VertexProcessor::setSwizzleG(sampler, swizzleG);
2338                 }
2339         }
2340
2341         void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2342         {
2343                 if(type == SAMPLER_PIXEL)
2344                 {
2345                         PixelProcessor::setSwizzleB(sampler, swizzleB);
2346                 }
2347                 else
2348                 {
2349                         VertexProcessor::setSwizzleB(sampler, swizzleB);
2350                 }
2351         }
2352
2353         void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2354         {
2355                 if(type == SAMPLER_PIXEL)
2356                 {
2357                         PixelProcessor::setSwizzleA(sampler, swizzleA);
2358                 }
2359                 else
2360                 {
2361                         VertexProcessor::setSwizzleA(sampler, swizzleA);
2362                 }
2363         }
2364
2365         void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2366         {
2367                 if(type == SAMPLER_PIXEL)
2368                 {
2369                         PixelProcessor::setBaseLevel(sampler, baseLevel);
2370                 }
2371                 else
2372                 {
2373                         VertexProcessor::setBaseLevel(sampler, baseLevel);
2374                 }
2375         }
2376
2377         void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2378         {
2379                 if(type == SAMPLER_PIXEL)
2380                 {
2381                         PixelProcessor::setMaxLevel(sampler, maxLevel);
2382                 }
2383                 else
2384                 {
2385                         VertexProcessor::setMaxLevel(sampler, maxLevel);
2386                 }
2387         }
2388
2389         void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2390         {
2391                 if(type == SAMPLER_PIXEL)
2392                 {
2393                         PixelProcessor::setMinLod(sampler, minLod);
2394                 }
2395                 else
2396                 {
2397                         VertexProcessor::setMinLod(sampler, minLod);
2398                 }
2399         }
2400
2401         void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2402         {
2403                 if(type == SAMPLER_PIXEL)
2404                 {
2405                         PixelProcessor::setMaxLod(sampler, maxLod);
2406                 }
2407                 else
2408                 {
2409                         VertexProcessor::setMaxLod(sampler, maxLod);
2410                 }
2411         }
2412
2413         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2414         {
2415                 context->setPointSpriteEnable(pointSpriteEnable);
2416         }
2417
2418         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2419         {
2420                 context->setPointScaleEnable(pointScaleEnable);
2421         }
2422
2423         void Renderer::setLineWidth(float width)
2424         {
2425                 context->lineWidth = width;
2426         }
2427
2428         void Renderer::setDepthBias(float bias)
2429         {
2430                 depthBias = bias;
2431         }
2432
2433         void Renderer::setSlopeDepthBias(float slopeBias)
2434         {
2435                 slopeDepthBias = slopeBias;
2436         }
2437
2438         void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2439         {
2440                 context->rasterizerDiscard = rasterizerDiscard;
2441         }
2442
2443         void Renderer::setPixelShader(const PixelShader *shader)
2444         {
2445                 context->pixelShader = shader;
2446
2447                 loadConstants(shader);
2448         }
2449
2450         void Renderer::setVertexShader(const VertexShader *shader)
2451         {
2452                 context->vertexShader = shader;
2453
2454                 loadConstants(shader);
2455         }
2456
2457         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2458         {
2459                 for(int i = 0; i < DRAW_COUNT; i++)
2460                 {
2461                         if(drawCall[i]->psDirtyConstF < index + count)
2462                         {
2463                                 drawCall[i]->psDirtyConstF = index + count;
2464                         }
2465                 }
2466
2467                 for(int i = 0; i < count; i++)
2468                 {
2469                         PixelProcessor::setFloatConstant(index + i, value);
2470                         value += 4;
2471                 }
2472         }
2473
2474         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2475         {
2476                 for(int i = 0; i < DRAW_COUNT; i++)
2477                 {
2478                         if(drawCall[i]->psDirtyConstI < index + count)
2479                         {
2480                                 drawCall[i]->psDirtyConstI = index + count;
2481                         }
2482                 }
2483
2484                 for(int i = 0; i < count; i++)
2485                 {
2486                         PixelProcessor::setIntegerConstant(index + i, value);
2487                         value += 4;
2488                 }
2489         }
2490
2491         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2492         {
2493                 for(int i = 0; i < DRAW_COUNT; i++)
2494                 {
2495                         if(drawCall[i]->psDirtyConstB < index + count)
2496                         {
2497                                 drawCall[i]->psDirtyConstB = index + count;
2498                         }
2499                 }
2500
2501                 for(int i = 0; i < count; i++)
2502                 {
2503                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2504                         boolean++;
2505                 }
2506         }
2507
2508         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2509         {
2510                 for(int i = 0; i < DRAW_COUNT; i++)
2511                 {
2512                         if(drawCall[i]->vsDirtyConstF < index + count)
2513                         {
2514                                 drawCall[i]->vsDirtyConstF = index + count;
2515                         }
2516                 }
2517
2518                 for(int i = 0; i < count; i++)
2519                 {
2520                         VertexProcessor::setFloatConstant(index + i, value);
2521                         value += 4;
2522                 }
2523         }
2524
2525         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2526         {
2527                 for(int i = 0; i < DRAW_COUNT; i++)
2528                 {
2529                         if(drawCall[i]->vsDirtyConstI < index + count)
2530                         {
2531                                 drawCall[i]->vsDirtyConstI = index + count;
2532                         }
2533                 }
2534
2535                 for(int i = 0; i < count; i++)
2536                 {
2537                         VertexProcessor::setIntegerConstant(index + i, value);
2538                         value += 4;
2539                 }
2540         }
2541
2542         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2543         {
2544                 for(int i = 0; i < DRAW_COUNT; i++)
2545                 {
2546                         if(drawCall[i]->vsDirtyConstB < index + count)
2547                         {
2548                                 drawCall[i]->vsDirtyConstB = index + count;
2549                         }
2550                 }
2551
2552                 for(int i = 0; i < count; i++)
2553                 {
2554                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2555                         boolean++;
2556                 }
2557         }
2558
2559         void Renderer::setModelMatrix(const Matrix &M, int i)
2560         {
2561                 VertexProcessor::setModelMatrix(M, i);
2562         }
2563
2564         void Renderer::setViewMatrix(const Matrix &V)
2565         {
2566                 VertexProcessor::setViewMatrix(V);
2567                 updateClipPlanes = true;
2568         }
2569
2570         void Renderer::setBaseMatrix(const Matrix &B)
2571         {
2572                 VertexProcessor::setBaseMatrix(B);
2573                 updateClipPlanes = true;
2574         }
2575
2576         void Renderer::setProjectionMatrix(const Matrix &P)
2577         {
2578                 VertexProcessor::setProjectionMatrix(P);
2579                 updateClipPlanes = true;
2580         }
2581
2582         void Renderer::addQuery(Query *query)
2583         {
2584                 queries.push_back(query);
2585         }
2586
2587         void Renderer::removeQuery(Query *query)
2588         {
2589                 queries.remove(query);
2590         }
2591
2592         #if PERF_HUD
2593                 int Renderer::getThreadCount()
2594                 {
2595                         return threadCount;
2596                 }
2597
2598                 int64_t Renderer::getVertexTime(int thread)
2599                 {
2600                         return vertexTime[thread];
2601                 }
2602
2603                 int64_t Renderer::getSetupTime(int thread)
2604                 {
2605                         return setupTime[thread];
2606                 }
2607
2608                 int64_t Renderer::getPixelTime(int thread)
2609                 {
2610                         return pixelTime[thread];
2611                 }
2612
2613                 void Renderer::resetTimers()
2614                 {
2615                         for(int thread = 0; thread < threadCount; thread++)
2616                         {
2617                                 vertexTime[thread] = 0;
2618                                 setupTime[thread] = 0;
2619                                 pixelTime[thread] = 0;
2620                         }
2621                 }
2622         #endif
2623
2624         void Renderer::setViewport(const Viewport &viewport)
2625         {
2626                 this->viewport = viewport;
2627         }
2628
2629         void Renderer::setScissor(const Rect &scissor)
2630         {
2631                 this->scissor = scissor;
2632         }
2633
2634         void Renderer::setClipFlags(int flags)
2635         {
2636                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2637         }
2638
2639         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2640         {
2641                 if(index < MAX_CLIP_PLANES)
2642                 {
2643                         userPlane[index] = plane;
2644                 }
2645                 else ASSERT(false);
2646
2647                 updateClipPlanes = true;
2648         }
2649
2650         void Renderer::updateConfiguration(bool initialUpdate)
2651         {
2652                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2653
2654                 if(newConfiguration || initialUpdate)
2655                 {
2656                         terminateThreads();
2657
2658                         SwiftConfig::Configuration configuration = {};
2659                         swiftConfig->getConfiguration(configuration);
2660
2661                         precacheVertex = !newConfiguration && configuration.precache;
2662                         precacheSetup = !newConfiguration && configuration.precache;
2663                         precachePixel = !newConfiguration && configuration.precache;
2664
2665                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2666                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2667                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2668
2669                         switch(configuration.textureSampleQuality)
2670                         {
2671                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2672                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2673                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2674                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2675                         }
2676
2677                         switch(configuration.mipmapQuality)
2678                         {
2679                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2680                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2681                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2682                         }
2683
2684                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2685
2686                         switch(configuration.transcendentalPrecision)
2687                         {
2688                         case 0:
2689                                 logPrecision = APPROXIMATE;
2690                                 expPrecision = APPROXIMATE;
2691                                 rcpPrecision = APPROXIMATE;
2692                                 rsqPrecision = APPROXIMATE;
2693                                 break;
2694                         case 1:
2695                                 logPrecision = PARTIAL;
2696                                 expPrecision = PARTIAL;
2697                                 rcpPrecision = PARTIAL;
2698                                 rsqPrecision = PARTIAL;
2699                                 break;
2700                         case 2:
2701                                 logPrecision = ACCURATE;
2702                                 expPrecision = ACCURATE;
2703                                 rcpPrecision = ACCURATE;
2704                                 rsqPrecision = ACCURATE;
2705                                 break;
2706                         case 3:
2707                                 logPrecision = WHQL;
2708                                 expPrecision = WHQL;
2709                                 rcpPrecision = WHQL;
2710                                 rsqPrecision = WHQL;
2711                                 break;
2712                         case 4:
2713                                 logPrecision = IEEE;
2714                                 expPrecision = IEEE;
2715                                 rcpPrecision = IEEE;
2716                                 rsqPrecision = IEEE;
2717                                 break;
2718                         default:
2719                                 logPrecision = ACCURATE;
2720                                 expPrecision = ACCURATE;
2721                                 rcpPrecision = ACCURATE;
2722                                 rsqPrecision = ACCURATE;
2723                                 break;
2724                         }
2725
2726                         switch(configuration.transparencyAntialiasing)
2727                         {
2728                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2729                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2730                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2731                         }
2732
2733                         switch(configuration.threadCount)
2734                         {
2735                         case -1: threadCount = CPUID::coreCount();        break;
2736                         case 0:  threadCount = CPUID::processAffinity();  break;
2737                         default: threadCount = configuration.threadCount; break;
2738                         }
2739
2740                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2741                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2742                         CPUID::setEnableSSE3(configuration.enableSSE3);
2743                         CPUID::setEnableSSE2(configuration.enableSSE2);
2744                         CPUID::setEnableSSE(configuration.enableSSE);
2745
2746                         for(int pass = 0; pass < 10; pass++)
2747                         {
2748                                 optimization[pass] = configuration.optimization[pass];
2749                         }
2750
2751                         forceWindowed = configuration.forceWindowed;
2752                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2753                         postBlendSRGB = configuration.postBlendSRGB;
2754                         exactColorRounding = configuration.exactColorRounding;
2755                         forceClearRegisters = configuration.forceClearRegisters;
2756
2757                 #ifndef NDEBUG
2758                         minPrimitives = configuration.minPrimitives;
2759                         maxPrimitives = configuration.maxPrimitives;
2760                 #endif
2761                 }
2762
2763                 if(!initialUpdate && !worker[0])
2764                 {
2765                         initializeThreads();
2766                 }
2767         }
2768 }