OSDN Git Service

glBlitFramebuffer support for depth/stencil formats
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Math.hpp"
19 #include "FrameBuffer.hpp"
20 #include "Timer.hpp"
21 #include "Surface.hpp"
22 #include "Half.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
27 #include "CPUID.hpp"
28 #include "Memory.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
31 #include "Debug.hpp"
32 #include "Reactor/Reactor.hpp"
33
34 #undef max
35
36 bool disableServer = true;
37
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42
43 namespace sw
44 {
45         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47         extern bool booleanFaceRegister;
48         extern bool fullPixelPositionRegister;
49         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50         extern bool secondaryColor;             // Specular lighting is applied after texturing
51
52         extern bool forceWindowed;
53         extern bool complementaryDepthBuffer;
54         extern bool postBlendSRGB;
55         extern bool exactColorRounding;
56         extern TransparencyAntialiasing transparencyAntialiasing;
57         extern bool forceClearRegisters;
58
59         extern bool precacheVertex;
60         extern bool precacheSetup;
61         extern bool precachePixel;
62
63         int batchSize = 128;
64         int threadCount = 1;
65         int unitCount = 1;
66         int clusterCount = 1;
67
68         TranscendentalPrecision logPrecision = ACCURATE;
69         TranscendentalPrecision expPrecision = ACCURATE;
70         TranscendentalPrecision rcpPrecision = ACCURATE;
71         TranscendentalPrecision rsqPrecision = ACCURATE;
72         bool perspectiveCorrection = true;
73
74         struct Parameters
75         {
76                 Renderer *renderer;
77                 int threadIndex;
78         };
79
80         DrawCall::DrawCall()
81         {
82                 queries = 0;
83
84                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
85                 vsDirtyConstI = 16;
86                 vsDirtyConstB = 16;
87
88                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
89                 psDirtyConstI = 16;
90                 psDirtyConstB = 16;
91
92                 references = -1;
93
94                 data = (DrawData*)allocate(sizeof(DrawData));
95                 data->constants = &constants;
96         }
97
98         DrawCall::~DrawCall()
99         {
100                 delete queries;
101
102                 deallocate(data);
103         }
104
105         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
106         {
107                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
110                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
112                 sw::secondaryColor = conventions.secondaryColor;
113                 sw::exactColorRounding = exactColorRounding;
114
115                 setRenderTarget(0, 0);
116                 clipper = new Clipper(symmetricNormalizedDepth);
117
118                 updateViewMatrix = true;
119                 updateBaseMatrix = true;
120                 updateProjectionMatrix = true;
121                 updateClipPlanes = true;
122
123                 #if PERF_HUD
124                         resetTimers();
125                 #endif
126
127                 for(int i = 0; i < 16; i++)
128                 {
129                         vertexTask[i] = 0;
130
131                         worker[i] = 0;
132                         resume[i] = 0;
133                         suspend[i] = 0;
134                 }
135
136                 threadsAwake = 0;
137                 resumeApp = new Event();
138
139                 currentDraw = 0;
140                 nextDraw = 0;
141
142                 qHead = 0;
143                 qSize = 0;
144
145                 for(int i = 0; i < 16; i++)
146                 {
147                         triangleBatch[i] = 0;
148                         primitiveBatch[i] = 0;
149                 }
150
151                 for(int draw = 0; draw < DRAW_COUNT; draw++)
152                 {
153                         drawCall[draw] = new DrawCall();
154                         drawList[draw] = drawCall[draw];
155                 }
156
157                 for(int unit = 0; unit < 16; unit++)
158                 {
159                         primitiveProgress[unit].init();
160                 }
161
162                 for(int cluster = 0; cluster < 16; cluster++)
163                 {
164                         pixelProgress[cluster].init();
165                 }
166
167                 clipFlags = 0;
168
169                 swiftConfig = new SwiftConfig(disableServer);
170                 updateConfiguration(true);
171
172                 sync = new Resource(0);
173         }
174
175         Renderer::~Renderer()
176         {
177                 sync->destruct();
178
179                 delete clipper;
180                 clipper = 0;
181
182                 terminateThreads();
183                 delete resumeApp;
184
185                 for(int draw = 0; draw < DRAW_COUNT; draw++)
186                 {
187                         delete drawCall[draw];
188                 }
189
190                 delete swiftConfig;
191         }
192
193         // This object has to be mem aligned
194         void* Renderer::operator new(size_t size)\r
195         {\r
196                 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class\r
197                 return sw::allocate(sizeof(Renderer), 16);\r
198         }\r
199 \r
200         void Renderer::operator delete(void * mem)\r
201         {\r
202                 sw::deallocate(mem);\r
203         }
204
205         void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
206         {
207                 blitter.clear(pixel, format, dest, dRect, rgbaMask);
208         }
209
210         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
211         {
212                 blitter.blit(source, sRect, dest, dRect, filter, isStencil);
213         }
214
215         void Renderer::blit3D(Surface *source, Surface *dest)
216         {
217                 blitter.blit3D(source, dest);
218         }
219
220         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
221         {
222                 #ifndef NDEBUG
223                         if(count < minPrimitives || count > maxPrimitives)
224                         {
225                                 return;
226                         }
227                 #endif
228
229                 context->drawType = drawType;
230
231                 updateConfiguration();
232                 updateClipper();
233
234                 int ss = context->getSuperSampleCount();
235                 int ms = context->getMultiSampleCount();
236
237                 for(int q = 0; q < ss; q++)
238                 {
239                         unsigned int oldMultiSampleMask = context->multiSampleMask;
240                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
241
242                         if(!context->multiSampleMask)
243                         {
244                                 continue;
245                         }
246
247                         sync->lock(sw::PRIVATE);
248
249                         if(update || oldMultiSampleMask != context->multiSampleMask)
250                         {
251                                 vertexState = VertexProcessor::update(drawType);
252                                 setupState = SetupProcessor::update();
253                                 pixelState = PixelProcessor::update();
254
255                                 vertexRoutine = VertexProcessor::routine(vertexState);
256                                 setupRoutine = SetupProcessor::routine(setupState);
257                                 pixelRoutine = PixelProcessor::routine(pixelState);
258                         }
259
260                         int batch = batchSize / ms;
261
262                         int (Renderer::*setupPrimitives)(int batch, int count);
263
264                         if(context->isDrawTriangle())
265                         {
266                                 switch(context->fillMode)
267                                 {
268                                 case FILL_SOLID:
269                                         setupPrimitives = &Renderer::setupSolidTriangles;
270                                         break;
271                                 case FILL_WIREFRAME:
272                                         setupPrimitives = &Renderer::setupWireframeTriangle;
273                                         batch = 1;
274                                         break;
275                                 case FILL_VERTEX:
276                                         setupPrimitives = &Renderer::setupVertexTriangle;
277                                         batch = 1;
278                                         break;
279                                 default:
280                                         ASSERT(false);
281                                         return;
282                                 }
283                         }
284                         else if(context->isDrawLine())
285                         {
286                                 setupPrimitives = &Renderer::setupLines;
287                         }
288                         else   // Point draw
289                         {
290                                 setupPrimitives = &Renderer::setupPoints;
291                         }
292
293                         DrawCall *draw = 0;
294
295                         do
296                         {
297                                 for(int i = 0; i < DRAW_COUNT; i++)
298                                 {
299                                         if(drawCall[i]->references == -1)
300                                         {
301                                                 draw = drawCall[i];
302                                                 drawList[nextDraw % DRAW_COUNT] = draw;
303
304                                                 break;
305                                         }
306                                 }
307
308                                 if(!draw)
309                                 {
310                                         resumeApp->wait();
311                                 }
312                         }
313                         while(!draw);
314
315                         DrawData *data = draw->data;
316
317                         if(queries.size() != 0)
318                         {
319                                 draw->queries = new std::list<Query*>();
320                                 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
321                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
322                                 {
323                                         Query* q = *query;
324                                         if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
325                                         {
326                                                 atomicIncrement(&(q->reference));
327                                                 draw->queries->push_back(q);
328                                         }
329                                 }
330                         }
331
332                         draw->drawType = drawType;
333                         draw->batchSize = batch;
334
335                         vertexRoutine->bind();
336                         setupRoutine->bind();
337                         pixelRoutine->bind();
338
339                         draw->vertexRoutine = vertexRoutine;
340                         draw->setupRoutine = setupRoutine;
341                         draw->pixelRoutine = pixelRoutine;
342                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
343                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
344                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
345                         draw->setupPrimitives = setupPrimitives;
346                         draw->setupState = setupState;
347
348                         for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
349                         {
350                                 draw->vertexStream[i] = context->input[i].resource;
351                                 data->input[i] = context->input[i].buffer;
352                                 data->stride[i] = context->input[i].stride;
353
354                                 if(draw->vertexStream[i])
355                                 {
356                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
357                                 }
358                         }
359
360                         if(context->indexBuffer)
361                         {
362                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
363                         }
364
365                         draw->indexBuffer = context->indexBuffer;
366
367                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
368                         {
369                                 draw->texture[sampler] = 0;
370                         }
371
372                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
373                         {
374                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
375                                 {
376                                         draw->texture[sampler] = context->texture[sampler];
377                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
378
379                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
380                                 }
381                         }
382
383                         if(context->pixelShader)
384                         {
385                                 if(draw->psDirtyConstF)
386                                 {
387                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
388                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
389                                         draw->psDirtyConstF = 0;
390                                 }
391
392                                 if(draw->psDirtyConstI)
393                                 {
394                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
395                                         draw->psDirtyConstI = 0;
396                                 }
397
398                                 if(draw->psDirtyConstB)
399                                 {
400                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
401                                         draw->psDirtyConstB = 0;
402                                 }
403
404                                 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
405                         }
406                         else
407                         {
408                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
409                                 {
410                                         draw->pUniformBuffers[i] = nullptr;
411                                 }
412                         }
413
414                         if(context->pixelShaderVersion() <= 0x0104)
415                         {
416                                 for(int stage = 0; stage < 8; stage++)
417                                 {
418                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
419                                         {
420                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
421                                         }
422                                         else break;
423                                 }
424                         }
425
426                         if(context->vertexShader)
427                         {
428                                 if(context->vertexShader->getVersion() >= 0x0300)
429                                 {
430                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
431                                         {
432                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
433                                                 {
434                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
435                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
436
437                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
438                                                 }
439                                         }
440                                 }
441
442                                 if(draw->vsDirtyConstF)
443                                 {
444                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
445                                         draw->vsDirtyConstF = 0;
446                                 }
447
448                                 if(draw->vsDirtyConstI)
449                                 {
450                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
451                                         draw->vsDirtyConstI = 0;
452                                 }
453
454                                 if(draw->vsDirtyConstB)
455                                 {
456                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
457                                         draw->vsDirtyConstB = 0;
458                                 }
459
460                                 if(context->vertexShader->isInstanceIdDeclared())
461                                 {
462                                         data->instanceID = context->instanceID;
463                                 }
464
465                                 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
466                                 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
467                         }
468                         else
469                         {
470                                 data->ff = ff;
471
472                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
473                                 draw->vsDirtyConstI = 16;
474                                 draw->vsDirtyConstB = 16;
475
476                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
477                                 {
478                                         draw->vUniformBuffers[i] = nullptr;
479                                 }
480
481                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
482                                 {
483                                         draw->transformFeedbackBuffers[i] = nullptr;
484                                 }
485                         }
486
487                         if(pixelState.stencilActive)
488                         {
489                                 data->stencil[0] = stencil;
490                                 data->stencil[1] = stencilCCW;
491                         }
492
493                         if(pixelState.fogActive)
494                         {
495                                 data->fog = fog;
496                         }
497
498                         if(setupState.isDrawPoint)
499                         {
500                                 data->point = point;
501                         }
502
503                         data->lineWidth = context->lineWidth;
504
505                         data->factor = factor;
506
507                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
508                         {
509                                 float ref = context->alphaReference * (1.0f / 255.0f);
510                                 float margin = sw::min(ref, 1.0f - ref);
511
512                                 if(ms == 4)
513                                 {
514                                         data->a2c0 = replicate(ref - margin * 0.6f);
515                                         data->a2c1 = replicate(ref - margin * 0.2f);
516                                         data->a2c2 = replicate(ref + margin * 0.2f);
517                                         data->a2c3 = replicate(ref + margin * 0.6f);
518                                 }
519                                 else if(ms == 2)
520                                 {
521                                         data->a2c0 = replicate(ref - margin * 0.3f);
522                                         data->a2c1 = replicate(ref + margin * 0.3f);
523                                 }
524                                 else ASSERT(false);
525                         }
526
527                         if(pixelState.occlusionEnabled)
528                         {
529                                 for(int cluster = 0; cluster < clusterCount; cluster++)
530                                 {
531                                         data->occlusion[cluster] = 0;
532                                 }
533                         }
534
535                         #if PERF_PROFILE
536                                 for(int cluster = 0; cluster < clusterCount; cluster++)
537                                 {
538                                         for(int i = 0; i < PERF_TIMERS; i++)
539                                         {
540                                                 data->cycles[i][cluster] = 0;
541                                         }
542                                 }
543                         #endif
544
545                         // Viewport
546                         {
547                                 float W = 0.5f * viewport.width;
548                                 float H = 0.5f * viewport.height;
549                                 float X0 = viewport.x0 + W;
550                                 float Y0 = viewport.y0 + H;
551                                 float N = viewport.minZ;
552                                 float F = viewport.maxZ;
553                                 float Z = F - N;
554
555                                 if(context->isDrawTriangle(false))
556                                 {
557                                         N += depthBias;
558                                 }
559
560                                 if(complementaryDepthBuffer)
561                                 {
562                                         Z = -Z;
563                                         N = 1 - N;
564                                 }
565
566                                 static const float X[5][16] =   // Fragment offsets
567                                 {
568                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
569                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
570                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
571                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
572                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
573                                 };
574
575                                 static const float Y[5][16] =   // Fragment offsets
576                                 {
577                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
578                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
579                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
580                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
581                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
582                                 };
583
584                                 int s = sw::log2(ss);
585
586                                 data->Wx16 = replicate(W * 16);
587                                 data->Hx16 = replicate(H * 16);
588                                 data->X0x16 = replicate(X0 * 16 - 8);
589                                 data->Y0x16 = replicate(Y0 * 16 - 8);
590                                 data->XXXX = replicate(X[s][q] / W);
591                                 data->YYYY = replicate(Y[s][q] / H);
592                                 data->halfPixelX = replicate(0.5f / W);
593                                 data->halfPixelY = replicate(0.5f / H);
594                                 data->viewportHeight = abs(viewport.height);
595                                 data->slopeDepthBias = slopeDepthBias;
596                                 data->depthRange = Z;
597                                 data->depthNear = N;
598                                 draw->clipFlags = clipFlags;
599
600                                 if(clipFlags)
601                                 {
602                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
603                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
604                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
605                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
606                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
607                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
608                                 }
609                         }
610
611                         // Target
612                         {
613                                 for(int index = 0; index < RENDERTARGETS; index++)
614                                 {
615                                         draw->renderTarget[index] = context->renderTarget[index];
616
617                                         if(draw->renderTarget[index])
618                                         {
619                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
620                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
621                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
622                                         }
623                                 }
624
625                                 draw->depthBuffer = context->depthBuffer;
626                                 draw->stencilBuffer = context->stencilBuffer;
627
628                                 if(draw->depthBuffer)
629                                 {
630                                         data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
631                                         data->depthPitchB = context->depthBuffer->getInternalPitchB();
632                                         data->depthSliceB = context->depthBuffer->getInternalSliceB();
633                                 }
634
635                                 if(draw->stencilBuffer)
636                                 {
637                                         data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
638                                         data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
639                                         data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
640                                 }
641                         }
642
643                         // Scissor
644                         {
645                                 data->scissorX0 = scissor.x0;
646                                 data->scissorX1 = scissor.x1;
647                                 data->scissorY0 = scissor.y0;
648                                 data->scissorY1 = scissor.y1;
649                         }
650
651                         draw->primitive = 0;
652                         draw->count = count;
653
654                         draw->references = (count + batch - 1) / batch;
655
656                         schedulerMutex.lock();
657                         nextDraw++;
658                         schedulerMutex.unlock();
659
660                         if(threadCount > 1)
661                         {
662                                 if(!threadsAwake)
663                                 {
664                                         suspend[0]->wait();
665
666                                         threadsAwake = 1;
667                                         task[0].type = Task::RESUME;
668
669                                         resume[0]->signal();
670                                 }
671                         }
672                         else   // Use main thread for draw execution
673                         {
674                                 threadsAwake = 1;
675                                 task[0].type = Task::RESUME;
676
677                                 taskLoop(0);
678                         }
679                 }
680         }
681
682         void Renderer::threadFunction(void *parameters)
683         {
684                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
685                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
686
687                 if(logPrecision < IEEE)
688                 {
689                         CPUID::setFlushToZero(true);
690                         CPUID::setDenormalsAreZero(true);
691                 }
692
693                 renderer->threadLoop(threadIndex);
694         }
695
696         void Renderer::threadLoop(int threadIndex)
697         {
698                 while(!exitThreads)
699                 {
700                         taskLoop(threadIndex);
701
702                         suspend[threadIndex]->signal();
703                         resume[threadIndex]->wait();
704                 }
705         }
706
707         void Renderer::taskLoop(int threadIndex)
708         {
709                 while(task[threadIndex].type != Task::SUSPEND)
710                 {
711                         scheduleTask(threadIndex);
712                         executeTask(threadIndex);
713                 }
714         }
715
716         void Renderer::findAvailableTasks()
717         {
718                 // Find pixel tasks
719                 for(int cluster = 0; cluster < clusterCount; cluster++)
720                 {
721                         if(!pixelProgress[cluster].executing)
722                         {
723                                 for(int unit = 0; unit < unitCount; unit++)
724                                 {
725                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
726                                         {
727                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
728                                                 {
729                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
730                                                         {
731                                                                 Task &task = taskQueue[qHead];
732                                                                 task.type = Task::PIXELS;
733                                                                 task.primitiveUnit = unit;
734                                                                 task.pixelCluster = cluster;
735
736                                                                 pixelProgress[cluster].executing = true;
737
738                                                                 // Commit to the task queue
739                                                                 qHead = (qHead + 1) % 32;
740                                                                 qSize++;
741
742                                                                 break;
743                                                         }
744                                                 }
745                                         }
746                                 }
747                         }
748                 }
749
750                 // Find primitive tasks
751                 if(currentDraw == nextDraw)
752                 {
753                         return;   // No more primitives to process
754                 }
755
756                 for(int unit = 0; unit < unitCount; unit++)
757                 {
758                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
759
760                         if(draw->primitive >= draw->count)
761                         {
762                                 currentDraw++;
763
764                                 if(currentDraw == nextDraw)
765                                 {
766                                         return;   // No more primitives to process
767                                 }
768
769                                 draw = drawList[currentDraw % DRAW_COUNT];
770                         }
771
772                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
773                         {
774                                 int primitive = draw->primitive;
775                                 int count = draw->count;
776                                 int batch = draw->batchSize;
777
778                                 primitiveProgress[unit].drawCall = currentDraw;
779                                 primitiveProgress[unit].firstPrimitive = primitive;
780                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
781
782                                 draw->primitive += batch;
783
784                                 Task &task = taskQueue[qHead];
785                                 task.type = Task::PRIMITIVES;
786                                 task.primitiveUnit = unit;
787
788                                 primitiveProgress[unit].references = -1;
789
790                                 // Commit to the task queue
791                                 qHead = (qHead + 1) % 32;
792                                 qSize++;
793                         }
794                 }
795         }
796
797         void Renderer::scheduleTask(int threadIndex)
798         {
799                 schedulerMutex.lock();
800
801                 if((int)qSize < threadCount - threadsAwake + 1)
802                 {
803                         findAvailableTasks();
804                 }
805
806                 if(qSize != 0)
807                 {
808                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
809                         qSize--;
810
811                         if(threadsAwake != threadCount)
812                         {
813                                 int wakeup = qSize - threadsAwake + 1;
814
815                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
816                                 {
817                                         if(task[i].type == Task::SUSPEND)
818                                         {
819                                                 suspend[i]->wait();
820                                                 task[i].type = Task::RESUME;
821                                                 resume[i]->signal();
822
823                                                 threadsAwake++;
824                                                 wakeup--;
825                                         }
826                                 }
827                         }
828                 }
829                 else
830                 {
831                         task[threadIndex].type = Task::SUSPEND;
832
833                         threadsAwake--;
834                 }
835
836                 schedulerMutex.unlock();
837         }
838
839         void Renderer::executeTask(int threadIndex)
840         {
841                 #if PERF_HUD
842                         int64_t startTick = Timer::ticks();
843                 #endif
844
845                 switch(task[threadIndex].type)
846                 {
847                 case Task::PRIMITIVES:
848                         {
849                                 int unit = task[threadIndex].primitiveUnit;
850
851                                 int input = primitiveProgress[unit].firstPrimitive;
852                                 int count = primitiveProgress[unit].primitiveCount;
853                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
854                                 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
855
856                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
857
858                                 #if PERF_HUD
859                                         int64_t time = Timer::ticks();
860                                         vertexTime[threadIndex] += time - startTick;
861                                         startTick = time;
862                                 #endif
863
864                                 int visible = 0;
865
866                                 if(!draw->setupState.rasterizerDiscard)
867                                 {
868                                         visible = (this->*setupPrimitives)(unit, count);
869                                 }
870
871                                 primitiveProgress[unit].visible = visible;
872                                 primitiveProgress[unit].references = clusterCount;
873
874                                 #if PERF_HUD
875                                         setupTime[threadIndex] += Timer::ticks() - startTick;
876                                 #endif
877                         }
878                         break;
879                 case Task::PIXELS:
880                         {
881                                 int unit = task[threadIndex].primitiveUnit;
882                                 int visible = primitiveProgress[unit].visible;
883
884                                 if(visible > 0)
885                                 {
886                                         int cluster = task[threadIndex].pixelCluster;
887                                         Primitive *primitive = primitiveBatch[unit];
888                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
889                                         DrawData *data = draw->data;
890                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
891
892                                         pixelRoutine(primitive, visible, cluster, data);
893                                 }
894
895                                 finishRendering(task[threadIndex]);
896
897                                 #if PERF_HUD
898                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
899                                 #endif
900                         }
901                         break;
902                 case Task::RESUME:
903                         break;
904                 case Task::SUSPEND:
905                         break;
906                 default:
907                         ASSERT(false);
908                 }
909         }
910
911         void Renderer::synchronize()
912         {
913                 sync->lock(sw::PUBLIC);
914                 sync->unlock();
915         }
916
917         void Renderer::finishRendering(Task &pixelTask)
918         {
919                 int unit = pixelTask.primitiveUnit;
920                 int cluster = pixelTask.pixelCluster;
921
922                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
923                 DrawData &data = *draw.data;
924                 int primitive = primitiveProgress[unit].firstPrimitive;
925                 int count = primitiveProgress[unit].primitiveCount;
926                 int processedPrimitives = primitive + count;
927
928                 pixelProgress[cluster].processedPrimitives = processedPrimitives;
929
930                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
931                 {
932                         pixelProgress[cluster].drawCall++;
933                         pixelProgress[cluster].processedPrimitives = 0;
934                 }
935
936                 int ref = atomicDecrement(&primitiveProgress[unit].references);
937
938                 if(ref == 0)
939                 {
940                         ref = atomicDecrement(&draw.references);
941
942                         if(ref == 0)
943                         {
944                                 #if PERF_PROFILE
945                                         for(int cluster = 0; cluster < clusterCount; cluster++)
946                                         {
947                                                 for(int i = 0; i < PERF_TIMERS; i++)
948                                                 {
949                                                         profiler.cycles[i] += data.cycles[i][cluster];
950                                                 }
951                                         }
952                                 #endif
953
954                                 if(draw.queries)
955                                 {
956                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
957                                         {
958                                                 Query *query = *q;
959
960                                                 switch(query->type)
961                                                 {
962                                                 case Query::FRAGMENTS_PASSED:
963                                                         for(int cluster = 0; cluster < clusterCount; cluster++)
964                                                         {
965                                                                 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
966                                                         }
967                                                         break;
968                                                 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
969                                                         atomicAdd((volatile int*)&query->data, processedPrimitives);
970                                                         break;
971                                                 default:
972                                                         break;
973                                                 }
974
975                                                 atomicDecrement(&query->reference);
976                                         }
977
978                                         delete draw.queries;
979                                         draw.queries = 0;
980                                 }
981
982                                 for(int i = 0; i < RENDERTARGETS; i++)
983                                 {
984                                         if(draw.renderTarget[i])
985                                         {
986                                                 draw.renderTarget[i]->unlockInternal();
987                                         }
988                                 }
989
990                                 if(draw.depthBuffer)
991                                 {
992                                         draw.depthBuffer->unlockInternal();
993                                 }
994
995                                 if(draw.stencilBuffer)
996                                 {
997                                         draw.stencilBuffer->unlockStencil();
998                                 }
999
1000                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
1001                                 {
1002                                         if(draw.texture[i])
1003                                         {
1004                                                 draw.texture[i]->unlock();
1005                                         }
1006                                 }
1007
1008                                 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
1009                                 {
1010                                         if(draw.vertexStream[i])
1011                                         {
1012                                                 draw.vertexStream[i]->unlock();
1013                                         }
1014                                 }
1015
1016                                 if(draw.indexBuffer)
1017                                 {
1018                                         draw.indexBuffer->unlock();
1019                                 }
1020
1021                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1022                                 {
1023                                         if(draw.pUniformBuffers[i])
1024                                         {
1025                                                 draw.pUniformBuffers[i]->unlock();
1026                                         }
1027                                         if(draw.vUniformBuffers[i])
1028                                         {
1029                                                 draw.vUniformBuffers[i]->unlock();
1030                                         }
1031                                 }
1032
1033                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1034                                 {
1035                                         if(draw.transformFeedbackBuffers[i])
1036                                         {
1037                                                 draw.transformFeedbackBuffers[i]->unlock();
1038                                         }
1039                                 }
1040
1041                                 draw.vertexRoutine->unbind();
1042                                 draw.setupRoutine->unbind();
1043                                 draw.pixelRoutine->unbind();
1044
1045                                 sync->unlock();
1046
1047                                 draw.references = -1;
1048                                 resumeApp->signal();
1049                         }
1050                 }
1051
1052                 pixelProgress[cluster].executing = false;
1053         }
1054
1055         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1056         {
1057                 Triangle *triangle = triangleBatch[unit];
1058                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1059                 DrawData *data = draw->data;
1060                 VertexTask *task = vertexTask[thread];
1061
1062                 const void *indices = data->indices;
1063                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1064
1065                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1066                 {
1067                         task->vertexCache.clear();
1068                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1069                 }
1070
1071                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1072
1073                 switch(draw->drawType)
1074                 {
1075                 case DRAW_POINTLIST:
1076                         {
1077                                 unsigned int index = start;
1078
1079                                 for(unsigned int i = 0; i < triangleCount; i++)
1080                                 {
1081                                         batch[i][0] = index;
1082                                         batch[i][1] = index;
1083                                         batch[i][2] = index;
1084
1085                                         index += 1;
1086                                 }
1087                         }
1088                         break;
1089                 case DRAW_LINELIST:
1090                         {
1091                                 unsigned int index = 2 * start;
1092
1093                                 for(unsigned int i = 0; i < triangleCount; i++)
1094                                 {
1095                                         batch[i][0] = index + 0;
1096                                         batch[i][1] = index + 1;
1097                                         batch[i][2] = index + 1;
1098
1099                                         index += 2;
1100                                 }
1101                         }
1102                         break;
1103                 case DRAW_LINESTRIP:
1104                         {
1105                                 unsigned int index = start;
1106
1107                                 for(unsigned int i = 0; i < triangleCount; i++)
1108                                 {
1109                                         batch[i][0] = index + 0;
1110                                         batch[i][1] = index + 1;
1111                                         batch[i][2] = index + 1;
1112
1113                                         index += 1;
1114                                 }
1115                         }
1116                         break;
1117                 case DRAW_LINELOOP:
1118                         {
1119                                 unsigned int index = start;
1120
1121                                 for(unsigned int i = 0; i < triangleCount; i++)
1122                                 {
1123                                         batch[i][0] = (index + 0) % loop;
1124                                         batch[i][1] = (index + 1) % loop;
1125                                         batch[i][2] = (index + 1) % loop;
1126
1127                                         index += 1;
1128                                 }
1129                         }
1130                         break;
1131                 case DRAW_TRIANGLELIST:
1132                         {
1133                                 unsigned int index = 3 * start;
1134
1135                                 for(unsigned int i = 0; i < triangleCount; i++)
1136                                 {
1137                                         batch[i][0] = index + 0;
1138                                         batch[i][1] = index + 1;
1139                                         batch[i][2] = index + 2;
1140
1141                                         index += 3;
1142                                 }
1143                         }
1144                         break;
1145                 case DRAW_TRIANGLESTRIP:
1146                         {
1147                                 unsigned int index = start;
1148
1149                                 for(unsigned int i = 0; i < triangleCount; i++)
1150                                 {
1151                                         batch[i][0] = index + 0;
1152                                         batch[i][1] = index + (index & 1) + 1;
1153                                         batch[i][2] = index + (~index & 1) + 1;
1154
1155                                         index += 1;
1156                                 }
1157                         }
1158                         break;
1159                 case DRAW_TRIANGLEFAN:
1160                         {
1161                                 unsigned int index = start;
1162
1163                                 for(unsigned int i = 0; i < triangleCount; i++)
1164                                 {
1165                                         batch[i][0] = index + 1;
1166                                         batch[i][1] = index + 2;
1167                                         batch[i][2] = 0;
1168
1169                                         index += 1;
1170                                 }
1171                         }
1172                         break;
1173                 case DRAW_INDEXEDPOINTLIST8:
1174                         {
1175                                 const unsigned char *index = (const unsigned char*)indices + start;
1176
1177                                 for(unsigned int i = 0; i < triangleCount; i++)
1178                                 {
1179                                         batch[i][0] = *index;
1180                                         batch[i][1] = *index;
1181                                         batch[i][2] = *index;
1182
1183                                         index += 1;
1184                                 }
1185                         }
1186                         break;
1187                 case DRAW_INDEXEDPOINTLIST16:
1188                         {
1189                                 const unsigned short *index = (const unsigned short*)indices + start;
1190
1191                                 for(unsigned int i = 0; i < triangleCount; i++)
1192                                 {
1193                                         batch[i][0] = *index;
1194                                         batch[i][1] = *index;
1195                                         batch[i][2] = *index;
1196
1197                                         index += 1;
1198                                 }
1199                         }
1200                         break;
1201                 case DRAW_INDEXEDPOINTLIST32:
1202                         {
1203                                 const unsigned int *index = (const unsigned int*)indices + start;
1204
1205                                 for(unsigned int i = 0; i < triangleCount; i++)
1206                                 {
1207                                         batch[i][0] = *index;
1208                                         batch[i][1] = *index;
1209                                         batch[i][2] = *index;
1210
1211                                         index += 1;
1212                                 }
1213                         }
1214                         break;
1215                 case DRAW_INDEXEDLINELIST8:
1216                         {
1217                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1218
1219                                 for(unsigned int i = 0; i < triangleCount; i++)
1220                                 {
1221                                         batch[i][0] = index[0];
1222                                         batch[i][1] = index[1];
1223                                         batch[i][2] = index[1];
1224
1225                                         index += 2;
1226                                 }
1227                         }
1228                         break;
1229                 case DRAW_INDEXEDLINELIST16:
1230                         {
1231                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1232
1233                                 for(unsigned int i = 0; i < triangleCount; i++)
1234                                 {
1235                                         batch[i][0] = index[0];
1236                                         batch[i][1] = index[1];
1237                                         batch[i][2] = index[1];
1238
1239                                         index += 2;
1240                                 }
1241                         }
1242                         break;
1243                 case DRAW_INDEXEDLINELIST32:
1244                         {
1245                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1246
1247                                 for(unsigned int i = 0; i < triangleCount; i++)
1248                                 {
1249                                         batch[i][0] = index[0];
1250                                         batch[i][1] = index[1];
1251                                         batch[i][2] = index[1];
1252
1253                                         index += 2;
1254                                 }
1255                         }
1256                         break;
1257                 case DRAW_INDEXEDLINESTRIP8:
1258                         {
1259                                 const unsigned char *index = (const unsigned char*)indices + start;
1260
1261                                 for(unsigned int i = 0; i < triangleCount; i++)
1262                                 {
1263                                         batch[i][0] = index[0];
1264                                         batch[i][1] = index[1];
1265                                         batch[i][2] = index[1];
1266
1267                                         index += 1;
1268                                 }
1269                         }
1270                         break;
1271                 case DRAW_INDEXEDLINESTRIP16:
1272                         {
1273                                 const unsigned short *index = (const unsigned short*)indices + start;
1274
1275                                 for(unsigned int i = 0; i < triangleCount; i++)
1276                                 {
1277                                         batch[i][0] = index[0];
1278                                         batch[i][1] = index[1];
1279                                         batch[i][2] = index[1];
1280
1281                                         index += 1;
1282                                 }
1283                         }
1284                         break;
1285                 case DRAW_INDEXEDLINESTRIP32:
1286                         {
1287                                 const unsigned int *index = (const unsigned int*)indices + start;
1288
1289                                 for(unsigned int i = 0; i < triangleCount; i++)
1290                                 {
1291                                         batch[i][0] = index[0];
1292                                         batch[i][1] = index[1];
1293                                         batch[i][2] = index[1];
1294
1295                                         index += 1;
1296                                 }
1297                         }
1298                         break;
1299                 case DRAW_INDEXEDLINELOOP8:
1300                         {
1301                                 const unsigned char *index = (const unsigned char*)indices;
1302
1303                                 for(unsigned int i = 0; i < triangleCount; i++)
1304                                 {
1305                                         batch[i][0] = index[(start + i + 0) % loop];
1306                                         batch[i][1] = index[(start + i + 1) % loop];
1307                                         batch[i][2] = index[(start + i + 1) % loop];
1308                                 }
1309                         }
1310                         break;
1311                 case DRAW_INDEXEDLINELOOP16:
1312                         {
1313                                 const unsigned short *index = (const unsigned short*)indices;
1314
1315                                 for(unsigned int i = 0; i < triangleCount; i++)
1316                                 {
1317                                         batch[i][0] = index[(start + i + 0) % loop];
1318                                         batch[i][1] = index[(start + i + 1) % loop];
1319                                         batch[i][2] = index[(start + i + 1) % loop];
1320                                 }
1321                         }
1322                         break;
1323                 case DRAW_INDEXEDLINELOOP32:
1324                         {
1325                                 const unsigned int *index = (const unsigned int*)indices;
1326
1327                                 for(unsigned int i = 0; i < triangleCount; i++)
1328                                 {
1329                                         batch[i][0] = index[(start + i + 0) % loop];
1330                                         batch[i][1] = index[(start + i + 1) % loop];
1331                                         batch[i][2] = index[(start + i + 1) % loop];
1332                                 }
1333                         }
1334                         break;
1335                 case DRAW_INDEXEDTRIANGLELIST8:
1336                         {
1337                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1338
1339                                 for(unsigned int i = 0; i < triangleCount; i++)
1340                                 {
1341                                         batch[i][0] = index[0];
1342                                         batch[i][1] = index[1];
1343                                         batch[i][2] = index[2];
1344
1345                                         index += 3;
1346                                 }
1347                         }
1348                         break;
1349                 case DRAW_INDEXEDTRIANGLELIST16:
1350                         {
1351                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1352
1353                                 for(unsigned int i = 0; i < triangleCount; i++)
1354                                 {
1355                                         batch[i][0] = index[0];
1356                                         batch[i][1] = index[1];
1357                                         batch[i][2] = index[2];
1358
1359                                         index += 3;
1360                                 }
1361                         }
1362                         break;
1363                 case DRAW_INDEXEDTRIANGLELIST32:
1364                         {
1365                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1366
1367                                 for(unsigned int i = 0; i < triangleCount; i++)
1368                                 {
1369                                         batch[i][0] = index[0];
1370                                         batch[i][1] = index[1];
1371                                         batch[i][2] = index[2];
1372
1373                                         index += 3;
1374                                 }
1375                         }
1376                         break;
1377                 case DRAW_INDEXEDTRIANGLESTRIP8:
1378                         {
1379                                 const unsigned char *index = (const unsigned char*)indices + start;
1380
1381                                 for(unsigned int i = 0; i < triangleCount; i++)
1382                                 {
1383                                         batch[i][0] = index[0];
1384                                         batch[i][1] = index[((start + i) & 1) + 1];
1385                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1386
1387                                         index += 1;
1388                                 }
1389                         }
1390                         break;
1391                 case DRAW_INDEXEDTRIANGLESTRIP16:
1392                         {
1393                                 const unsigned short *index = (const unsigned short*)indices + start;
1394
1395                                 for(unsigned int i = 0; i < triangleCount; i++)
1396                                 {
1397                                         batch[i][0] = index[0];
1398                                         batch[i][1] = index[((start + i) & 1) + 1];
1399                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1400
1401                                         index += 1;
1402                                 }
1403                         }
1404                         break;
1405                 case DRAW_INDEXEDTRIANGLESTRIP32:
1406                         {
1407                                 const unsigned int *index = (const unsigned int*)indices + start;
1408
1409                                 for(unsigned int i = 0; i < triangleCount; i++)
1410                                 {
1411                                         batch[i][0] = index[0];
1412                                         batch[i][1] = index[((start + i) & 1) + 1];
1413                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1414
1415                                         index += 1;
1416                                 }
1417                         }
1418                         break;
1419                 case DRAW_INDEXEDTRIANGLEFAN8:
1420                         {
1421                                 const unsigned char *index = (const unsigned char*)indices;
1422
1423                                 for(unsigned int i = 0; i < triangleCount; i++)
1424                                 {
1425                                         batch[i][0] = index[start + i + 1];
1426                                         batch[i][1] = index[start + i + 2];
1427                                         batch[i][2] = index[0];
1428                                 }
1429                         }
1430                         break;
1431                 case DRAW_INDEXEDTRIANGLEFAN16:
1432                         {
1433                                 const unsigned short *index = (const unsigned short*)indices;
1434
1435                                 for(unsigned int i = 0; i < triangleCount; i++)
1436                                 {
1437                                         batch[i][0] = index[start + i + 1];
1438                                         batch[i][1] = index[start + i + 2];
1439                                         batch[i][2] = index[0];
1440                                 }
1441                         }
1442                         break;
1443                 case DRAW_INDEXEDTRIANGLEFAN32:
1444                         {
1445                                 const unsigned int *index = (const unsigned int*)indices;
1446
1447                                 for(unsigned int i = 0; i < triangleCount; i++)
1448                                 {
1449                                         batch[i][0] = index[start + i + 1];
1450                                         batch[i][1] = index[start + i + 2];
1451                                         batch[i][2] = index[0];
1452                                 }
1453                         }
1454                         break;
1455                 case DRAW_QUADLIST:
1456                         {
1457                                 unsigned int index = 4 * start / 2;
1458
1459                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1460                                 {
1461                                         batch[i+0][0] = index + 0;
1462                                         batch[i+0][1] = index + 1;
1463                                         batch[i+0][2] = index + 2;
1464
1465                                         batch[i+1][0] = index + 0;
1466                                         batch[i+1][1] = index + 2;
1467                                         batch[i+1][2] = index + 3;
1468
1469                                         index += 4;
1470                                 }
1471                         }
1472                         break;
1473                 default:
1474                         ASSERT(false);
1475                         return;
1476                 }
1477
1478                 task->primitiveStart = start;
1479                 task->vertexCount = triangleCount * 3;
1480                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1481         }
1482
1483         int Renderer::setupSolidTriangles(int unit, int count)
1484         {
1485                 Triangle *triangle = triangleBatch[unit];
1486                 Primitive *primitive = primitiveBatch[unit];
1487
1488                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1489                 SetupProcessor::State &state = draw.setupState;
1490                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1491
1492                 int ms = state.multiSample;
1493                 int pos = state.positionRegister;
1494                 const DrawData *data = draw.data;
1495                 int visible = 0;
1496
1497                 for(int i = 0; i < count; i++, triangle++)
1498                 {
1499                         Vertex &v0 = triangle->v0;
1500                         Vertex &v1 = triangle->v1;
1501                         Vertex &v2 = triangle->v2;
1502
1503                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1504                         {
1505                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1506
1507                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1508
1509                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1510                                 {
1511                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1512                                         {
1513                                                 continue;
1514                                         }
1515                                 }
1516
1517                                 if(setupRoutine(primitive, triangle, &polygon, data))
1518                                 {
1519                                         primitive += ms;
1520                                         visible++;
1521                                 }
1522                         }
1523                 }
1524
1525                 return visible;
1526         }
1527
1528         int Renderer::setupWireframeTriangle(int unit, int count)
1529         {
1530                 Triangle *triangle = triangleBatch[unit];
1531                 Primitive *primitive = primitiveBatch[unit];
1532                 int visible = 0;
1533
1534                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1535                 SetupProcessor::State &state = draw.setupState;
1536
1537                 const Vertex &v0 = triangle[0].v0;
1538                 const Vertex &v1 = triangle[0].v1;
1539                 const Vertex &v2 = triangle[0].v2;
1540
1541                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1542
1543                 if(state.cullMode == CULL_CLOCKWISE)
1544                 {
1545                         if(d >= 0) return 0;
1546                 }
1547                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1548                 {
1549                         if(d <= 0) return 0;
1550                 }
1551
1552                 // Copy attributes
1553                 triangle[1].v0 = v1;
1554                 triangle[1].v1 = v2;
1555                 triangle[2].v0 = v2;
1556                 triangle[2].v1 = v0;
1557
1558                 if(state.color[0][0].flat)   // FIXME
1559                 {
1560                         for(int i = 0; i < 2; i++)
1561                         {
1562                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1563                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1564                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1565                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1566                         }
1567                 }
1568
1569                 for(int i = 0; i < 3; i++)
1570                 {
1571                         if(setupLine(*primitive, *triangle, draw))
1572                         {
1573                                 primitive->area = 0.5f * d;
1574
1575                                 primitive++;
1576                                 visible++;
1577                         }
1578
1579                         triangle++;
1580                 }
1581
1582                 return visible;
1583         }
1584
1585         int Renderer::setupVertexTriangle(int unit, int count)
1586         {
1587                 Triangle *triangle = triangleBatch[unit];
1588                 Primitive *primitive = primitiveBatch[unit];
1589                 int visible = 0;
1590
1591                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1592                 SetupProcessor::State &state = draw.setupState;
1593
1594                 const Vertex &v0 = triangle[0].v0;
1595                 const Vertex &v1 = triangle[0].v1;
1596                 const Vertex &v2 = triangle[0].v2;
1597
1598                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1599
1600                 if(state.cullMode == CULL_CLOCKWISE)
1601                 {
1602                         if(d >= 0) return 0;
1603                 }
1604                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1605                 {
1606                         if(d <= 0) return 0;
1607                 }
1608
1609                 // Copy attributes
1610                 triangle[1].v0 = v1;
1611                 triangle[2].v0 = v2;
1612
1613                 for(int i = 0; i < 3; i++)
1614                 {
1615                         if(setupPoint(*primitive, *triangle, draw))
1616                         {
1617                                 primitive->area = 0.5f * d;
1618
1619                                 primitive++;
1620                                 visible++;
1621                         }
1622
1623                         triangle++;
1624                 }
1625
1626                 return visible;
1627         }
1628
1629         int Renderer::setupLines(int unit, int count)
1630         {
1631                 Triangle *triangle = triangleBatch[unit];
1632                 Primitive *primitive = primitiveBatch[unit];
1633                 int visible = 0;
1634
1635                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1636                 SetupProcessor::State &state = draw.setupState;
1637
1638                 int ms = state.multiSample;
1639
1640                 for(int i = 0; i < count; i++)
1641                 {
1642                         if(setupLine(*primitive, *triangle, draw))
1643                         {
1644                                 primitive += ms;
1645                                 visible++;
1646                         }
1647
1648                         triangle++;
1649                 }
1650
1651                 return visible;
1652         }
1653
1654         int Renderer::setupPoints(int unit, int count)
1655         {
1656                 Triangle *triangle = triangleBatch[unit];
1657                 Primitive *primitive = primitiveBatch[unit];
1658                 int visible = 0;
1659
1660                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1661                 SetupProcessor::State &state = draw.setupState;
1662
1663                 int ms = state.multiSample;
1664
1665                 for(int i = 0; i < count; i++)
1666                 {
1667                         if(setupPoint(*primitive, *triangle, draw))
1668                         {
1669                                 primitive += ms;
1670                                 visible++;
1671                         }
1672
1673                         triangle++;
1674                 }
1675
1676                 return visible;
1677         }
1678
1679         bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1680         {
1681                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1682                 const SetupProcessor::State &state = draw.setupState;
1683                 const DrawData &data = *draw.data;
1684
1685                 float lineWidth = data.lineWidth;
1686
1687                 Vertex &v0 = triangle.v0;
1688                 Vertex &v1 = triangle.v1;
1689
1690                 int pos = state.positionRegister;
1691
1692                 const float4 &P0 = v0.v[pos];
1693                 const float4 &P1 = v1.v[pos];
1694
1695                 if(P0.w <= 0 && P1.w <= 0)
1696                 {
1697                         return false;
1698                 }
1699
1700                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1701                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1702
1703                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1704                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1705
1706                 if(dx == 0 && dy == 0)
1707                 {
1708                         return false;
1709                 }
1710
1711                 if(false)   // Rectangle
1712                 {
1713                         float4 P[4];
1714                         int C[4];
1715
1716                         P[0] = P0;
1717                         P[1] = P1;
1718                         P[2] = P1;
1719                         P[3] = P0;
1720
1721                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1722
1723                         dx *= scale;
1724                         dy *= scale;
1725
1726                         float dx0w = dx * P0.w / W;
1727                         float dy0h = dy * P0.w / H;
1728                         float dx0h = dx * P0.w / H;
1729                         float dy0w = dy * P0.w / W;
1730
1731                         float dx1w = dx * P1.w / W;
1732                         float dy1h = dy * P1.w / H;
1733                         float dx1h = dx * P1.w / H;
1734                         float dy1w = dy * P1.w / W;
1735
1736                         P[0].x += -dy0w + -dx0w;
1737                         P[0].y += -dx0h + +dy0h;
1738                         C[0] = clipper->computeClipFlags(P[0]);
1739
1740                         P[1].x += -dy1w + +dx1w;
1741                         P[1].y += -dx1h + +dy1h;
1742                         C[1] = clipper->computeClipFlags(P[1]);
1743
1744                         P[2].x += +dy1w + +dx1w;
1745                         P[2].y += +dx1h + -dy1h;
1746                         C[2] = clipper->computeClipFlags(P[2]);
1747
1748                         P[3].x += +dy0w + -dx0w;
1749                         P[3].y += +dx0h + +dy0h;
1750                         C[3] = clipper->computeClipFlags(P[3]);
1751
1752                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1753                         {
1754                                 Polygon polygon(P, 4);
1755
1756                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1757
1758                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1759                                 {
1760                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1761                                         {
1762                                                 return false;
1763                                         }
1764                                 }
1765
1766                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1767                         }
1768                 }
1769                 else   // Diamond test convention
1770                 {
1771                         float4 P[8];
1772                         int C[8];
1773
1774                         P[0] = P0;
1775                         P[1] = P0;
1776                         P[2] = P0;
1777                         P[3] = P0;
1778                         P[4] = P1;
1779                         P[5] = P1;
1780                         P[6] = P1;
1781                         P[7] = P1;
1782
1783                         float dx0 = lineWidth * 0.5f * P0.w / W;
1784                         float dy0 = lineWidth * 0.5f * P0.w / H;
1785
1786                         float dx1 = lineWidth * 0.5f * P1.w / W;
1787                         float dy1 = lineWidth * 0.5f * P1.w / H;
1788
1789                         P[0].x += -dx0;
1790                         C[0] = clipper->computeClipFlags(P[0]);
1791
1792                         P[1].y += +dy0;
1793                         C[1] = clipper->computeClipFlags(P[1]);
1794
1795                         P[2].x += +dx0;
1796                         C[2] = clipper->computeClipFlags(P[2]);
1797
1798                         P[3].y += -dy0;
1799                         C[3] = clipper->computeClipFlags(P[3]);
1800
1801                         P[4].x += -dx1;
1802                         C[4] = clipper->computeClipFlags(P[4]);
1803
1804                         P[5].y += +dy1;
1805                         C[5] = clipper->computeClipFlags(P[5]);
1806
1807                         P[6].x += +dx1;
1808                         C[6] = clipper->computeClipFlags(P[6]);
1809
1810                         P[7].y += -dy1;
1811                         C[7] = clipper->computeClipFlags(P[7]);
1812
1813                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1814                         {
1815                                 float4 L[6];
1816
1817                                 if(dx > -dy)
1818                                 {
1819                                         if(dx > dy)   // Right
1820                                         {
1821                                                 L[0] = P[0];
1822                                                 L[1] = P[1];
1823                                                 L[2] = P[5];
1824                                                 L[3] = P[6];
1825                                                 L[4] = P[7];
1826                                                 L[5] = P[3];
1827                                         }
1828                                         else   // Down
1829                                         {
1830                                                 L[0] = P[0];
1831                                                 L[1] = P[4];
1832                                                 L[2] = P[5];
1833                                                 L[3] = P[6];
1834                                                 L[4] = P[2];
1835                                                 L[5] = P[3];
1836                                         }
1837                                 }
1838                                 else
1839                                 {
1840                                         if(dx > dy)   // Up
1841                                         {
1842                                                 L[0] = P[0];
1843                                                 L[1] = P[1];
1844                                                 L[2] = P[2];
1845                                                 L[3] = P[6];
1846                                                 L[4] = P[7];
1847                                                 L[5] = P[4];
1848                                         }
1849                                         else   // Left
1850                                         {
1851                                                 L[0] = P[1];
1852                                                 L[1] = P[2];
1853                                                 L[2] = P[3];
1854                                                 L[3] = P[7];
1855                                                 L[4] = P[4];
1856                                                 L[5] = P[5];
1857                                         }
1858                                 }
1859
1860                                 Polygon polygon(L, 6);
1861
1862                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1863
1864                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1865                                 {
1866                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1867                                         {
1868                                                 return false;
1869                                         }
1870                                 }
1871
1872                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1873                         }
1874                 }
1875
1876                 return false;
1877         }
1878
1879         bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1880         {
1881                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1882                 const SetupProcessor::State &state = draw.setupState;
1883                 const DrawData &data = *draw.data;
1884
1885                 Vertex &v = triangle.v0;
1886
1887                 float pSize;
1888
1889                 int pts = state.pointSizeRegister;
1890
1891                 if(state.pointSizeRegister != Unused)
1892                 {
1893                         pSize = v.v[pts].y;
1894                 }
1895                 else
1896                 {
1897                         pSize = data.point.pointSize[0];
1898                 }
1899
1900                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1901
1902                 float4 P[4];
1903                 int C[4];
1904
1905                 int pos = state.positionRegister;
1906
1907                 P[0] = v.v[pos];
1908                 P[1] = v.v[pos];
1909                 P[2] = v.v[pos];
1910                 P[3] = v.v[pos];
1911
1912                 const float X = pSize * P[0].w * data.halfPixelX[0];
1913                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1914
1915                 P[0].x -= X;
1916                 P[0].y += Y;
1917                 C[0] = clipper->computeClipFlags(P[0]);
1918
1919                 P[1].x += X;
1920                 P[1].y += Y;
1921                 C[1] = clipper->computeClipFlags(P[1]);
1922
1923                 P[2].x += X;
1924                 P[2].y -= Y;
1925                 C[2] = clipper->computeClipFlags(P[2]);
1926
1927                 P[3].x -= X;
1928                 P[3].y -= Y;
1929                 C[3] = clipper->computeClipFlags(P[3]);
1930
1931                 triangle.v1 = triangle.v0;
1932                 triangle.v2 = triangle.v0;
1933
1934                 triangle.v1.X += iround(16 * 0.5f * pSize);
1935                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1936
1937                 Polygon polygon(P, 4);
1938
1939                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1940                 {
1941                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1942
1943                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1944                         {
1945                                 if(!clipper->clip(polygon, clipFlagsOr, draw))
1946                                 {
1947                                         return false;
1948                                 }
1949                         }
1950
1951                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1952                 }
1953
1954                 return false;
1955         }
1956
1957         void Renderer::initializeThreads()
1958         {
1959                 unitCount = ceilPow2(threadCount);
1960                 clusterCount = ceilPow2(threadCount);
1961
1962                 for(int i = 0; i < unitCount; i++)
1963                 {
1964                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1965                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1966                 }
1967
1968                 for(int i = 0; i < threadCount; i++)
1969                 {
1970                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1971                         vertexTask[i]->vertexCache.drawCall = -1;
1972
1973                         task[i].type = Task::SUSPEND;
1974
1975                         resume[i] = new Event();
1976                         suspend[i] = new Event();
1977
1978                         Parameters parameters;
1979                         parameters.threadIndex = i;
1980                         parameters.renderer = this;
1981
1982                         exitThreads = false;
1983                         worker[i] = new Thread(threadFunction, &parameters);
1984
1985                         suspend[i]->wait();
1986                         suspend[i]->signal();
1987                 }
1988         }
1989
1990         void Renderer::terminateThreads()
1991         {
1992                 while(threadsAwake != 0)
1993                 {
1994                         Thread::sleep(1);
1995                 }
1996
1997                 for(int thread = 0; thread < threadCount; thread++)
1998                 {
1999                         if(worker[thread])
2000                         {
2001                                 exitThreads = true;
2002                                 resume[thread]->signal();
2003                                 worker[thread]->join();
2004
2005                                 delete worker[thread];
2006                                 worker[thread] = 0;
2007                                 delete resume[thread];
2008                                 resume[thread] = 0;
2009                                 delete suspend[thread];
2010                                 suspend[thread] = 0;
2011                         }
2012
2013                         deallocate(vertexTask[thread]);
2014                         vertexTask[thread] = 0;
2015                 }
2016
2017                 for(int i = 0; i < 16; i++)
2018                 {
2019                         deallocate(triangleBatch[i]);
2020                         triangleBatch[i] = 0;
2021
2022                         deallocate(primitiveBatch[i]);
2023                         primitiveBatch[i] = 0;
2024                 }
2025         }
2026
2027         void Renderer::loadConstants(const VertexShader *vertexShader)
2028         {
2029                 if(!vertexShader) return;
2030
2031                 size_t count = vertexShader->getLength();
2032
2033                 for(size_t i = 0; i < count; i++)
2034                 {
2035                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2036
2037                         if(instruction->opcode == Shader::OPCODE_DEF)
2038                         {
2039                                 int index = instruction->dst.index;
2040                                 float value[4];
2041
2042                                 value[0] = instruction->src[0].value[0];
2043                                 value[1] = instruction->src[0].value[1];
2044                                 value[2] = instruction->src[0].value[2];
2045                                 value[3] = instruction->src[0].value[3];
2046
2047                                 setVertexShaderConstantF(index, value);
2048                         }
2049                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2050                         {
2051                                 int index = instruction->dst.index;
2052                                 int integer[4];
2053
2054                                 integer[0] = instruction->src[0].integer[0];
2055                                 integer[1] = instruction->src[0].integer[1];
2056                                 integer[2] = instruction->src[0].integer[2];
2057                                 integer[3] = instruction->src[0].integer[3];
2058
2059                                 setVertexShaderConstantI(index, integer);
2060                         }
2061                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2062                         {
2063                                 int index = instruction->dst.index;
2064                                 int boolean = instruction->src[0].boolean[0];
2065
2066                                 setVertexShaderConstantB(index, &boolean);
2067                         }
2068                 }
2069         }
2070
2071         void Renderer::loadConstants(const PixelShader *pixelShader)
2072         {
2073                 if(!pixelShader) return;
2074
2075                 size_t count = pixelShader->getLength();
2076
2077                 for(size_t i = 0; i < count; i++)
2078                 {
2079                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2080
2081                         if(instruction->opcode == Shader::OPCODE_DEF)
2082                         {
2083                                 int index = instruction->dst.index;
2084                                 float value[4];
2085
2086                                 value[0] = instruction->src[0].value[0];
2087                                 value[1] = instruction->src[0].value[1];
2088                                 value[2] = instruction->src[0].value[2];
2089                                 value[3] = instruction->src[0].value[3];
2090
2091                                 setPixelShaderConstantF(index, value);
2092                         }
2093                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2094                         {
2095                                 int index = instruction->dst.index;
2096                                 int integer[4];
2097
2098                                 integer[0] = instruction->src[0].integer[0];
2099                                 integer[1] = instruction->src[0].integer[1];
2100                                 integer[2] = instruction->src[0].integer[2];
2101                                 integer[3] = instruction->src[0].integer[3];
2102
2103                                 setPixelShaderConstantI(index, integer);
2104                         }
2105                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2106                         {
2107                                 int index = instruction->dst.index;
2108                                 int boolean = instruction->src[0].boolean[0];
2109
2110                                 setPixelShaderConstantB(index, &boolean);
2111                         }
2112                 }
2113         }
2114
2115         void Renderer::setIndexBuffer(Resource *indexBuffer)
2116         {
2117                 context->indexBuffer = indexBuffer;
2118         }
2119
2120         void Renderer::setMultiSampleMask(unsigned int mask)
2121         {
2122                 context->sampleMask = mask;
2123         }
2124
2125         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2126         {
2127                 sw::transparencyAntialiasing = transparencyAntialiasing;
2128         }
2129
2130         bool Renderer::isReadWriteTexture(int sampler)
2131         {
2132                 for(int index = 0; index < RENDERTARGETS; index++)
2133                 {
2134                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2135                         {
2136                                 return true;
2137                         }
2138                 }
2139
2140                 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2141                 {
2142                         return true;
2143                 }
2144
2145                 return false;
2146         }
2147
2148         void Renderer::updateClipper()
2149         {
2150                 if(updateClipPlanes)
2151                 {
2152                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2153                         {
2154                                 const Matrix &scissorWorld = getViewTransform();
2155
2156                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2157                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2158                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2159                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2160                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2161                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2162                         }
2163                         else   // User plane in clip space
2164                         {
2165                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2166                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2167                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2168                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2169                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2170                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2171                         }
2172
2173                         updateClipPlanes = false;
2174                 }
2175         }
2176
2177         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2178         {
2179                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2180
2181                 context->texture[sampler] = resource;
2182         }
2183
2184         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2185         {
2186                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2187
2188                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2189         }
2190
2191         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2192         {
2193                 if(type == SAMPLER_PIXEL)
2194                 {
2195                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2196                 }
2197                 else
2198                 {
2199                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2200                 }
2201         }
2202
2203         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2204         {
2205                 if(type == SAMPLER_PIXEL)
2206                 {
2207                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2208                 }
2209                 else
2210                 {
2211                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2212                 }
2213         }
2214
2215         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2216         {
2217                 if(type == SAMPLER_PIXEL)
2218                 {
2219                         PixelProcessor::setGatherEnable(sampler, enable);
2220                 }
2221                 else
2222                 {
2223                         VertexProcessor::setGatherEnable(sampler, enable);
2224                 }
2225         }
2226
2227         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2228         {
2229                 if(type == SAMPLER_PIXEL)
2230                 {
2231                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2232                 }
2233                 else
2234                 {
2235                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2236                 }
2237         }
2238
2239         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2240         {
2241                 if(type == SAMPLER_PIXEL)
2242                 {
2243                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2244                 }
2245                 else
2246                 {
2247                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2248                 }
2249         }
2250
2251         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2252         {
2253                 if(type == SAMPLER_PIXEL)
2254                 {
2255                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2256                 }
2257                 else
2258                 {
2259                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2260                 }
2261         }
2262
2263         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2264         {
2265                 if(type == SAMPLER_PIXEL)
2266                 {
2267                         PixelProcessor::setReadSRGB(sampler, sRGB);
2268                 }
2269                 else
2270                 {
2271                         VertexProcessor::setReadSRGB(sampler, sRGB);
2272                 }
2273         }
2274
2275         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2276         {
2277                 if(type == SAMPLER_PIXEL)
2278                 {
2279                         PixelProcessor::setMipmapLOD(sampler, bias);
2280                 }
2281                 else
2282                 {
2283                         VertexProcessor::setMipmapLOD(sampler, bias);
2284                 }
2285         }
2286
2287         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2288         {
2289                 if(type == SAMPLER_PIXEL)
2290                 {
2291                         PixelProcessor::setBorderColor(sampler, borderColor);
2292                 }
2293                 else
2294                 {
2295                         VertexProcessor::setBorderColor(sampler, borderColor);
2296                 }
2297         }
2298
2299         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2300         {
2301                 if(type == SAMPLER_PIXEL)
2302                 {
2303                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2304                 }
2305                 else
2306                 {
2307                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2308                 }
2309         }
2310
2311         void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2312         {
2313                 if(type == SAMPLER_PIXEL)
2314                 {
2315                         PixelProcessor::setSwizzleR(sampler, swizzleR);
2316                 }
2317                 else
2318                 {
2319                         VertexProcessor::setSwizzleR(sampler, swizzleR);
2320                 }
2321         }
2322
2323         void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2324         {
2325                 if(type == SAMPLER_PIXEL)
2326                 {
2327                         PixelProcessor::setSwizzleG(sampler, swizzleG);
2328                 }
2329                 else
2330                 {
2331                         VertexProcessor::setSwizzleG(sampler, swizzleG);
2332                 }
2333         }
2334
2335         void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2336         {
2337                 if(type == SAMPLER_PIXEL)
2338                 {
2339                         PixelProcessor::setSwizzleB(sampler, swizzleB);
2340                 }
2341                 else
2342                 {
2343                         VertexProcessor::setSwizzleB(sampler, swizzleB);
2344                 }
2345         }
2346
2347         void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2348         {
2349                 if(type == SAMPLER_PIXEL)
2350                 {
2351                         PixelProcessor::setSwizzleA(sampler, swizzleA);
2352                 }
2353                 else
2354                 {
2355                         VertexProcessor::setSwizzleA(sampler, swizzleA);
2356                 }
2357         }
2358
2359         void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2360         {
2361                 if(type == SAMPLER_PIXEL)
2362                 {
2363                         PixelProcessor::setBaseLevel(sampler, baseLevel);
2364                 }
2365                 else
2366                 {
2367                         VertexProcessor::setBaseLevel(sampler, baseLevel);
2368                 }
2369         }
2370
2371         void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2372         {
2373                 if(type == SAMPLER_PIXEL)
2374                 {
2375                         PixelProcessor::setMaxLevel(sampler, maxLevel);
2376                 }
2377                 else
2378                 {
2379                         VertexProcessor::setMaxLevel(sampler, maxLevel);
2380                 }
2381         }
2382
2383         void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2384         {
2385                 if(type == SAMPLER_PIXEL)
2386                 {
2387                         PixelProcessor::setMinLod(sampler, minLod);
2388                 }
2389                 else
2390                 {
2391                         VertexProcessor::setMinLod(sampler, minLod);
2392                 }
2393         }
2394
2395         void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2396         {
2397                 if(type == SAMPLER_PIXEL)
2398                 {
2399                         PixelProcessor::setMaxLod(sampler, maxLod);
2400                 }
2401                 else
2402                 {
2403                         VertexProcessor::setMaxLod(sampler, maxLod);
2404                 }
2405         }
2406
2407         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2408         {
2409                 context->setPointSpriteEnable(pointSpriteEnable);
2410         }
2411
2412         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2413         {
2414                 context->setPointScaleEnable(pointScaleEnable);
2415         }
2416
2417         void Renderer::setLineWidth(float width)
2418         {
2419                 context->lineWidth = width;
2420         }
2421
2422         void Renderer::setDepthBias(float bias)
2423         {
2424                 depthBias = bias;
2425         }
2426
2427         void Renderer::setSlopeDepthBias(float slopeBias)
2428         {
2429                 slopeDepthBias = slopeBias;
2430         }
2431
2432         void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2433         {
2434                 context->rasterizerDiscard = rasterizerDiscard;
2435         }
2436
2437         void Renderer::setPixelShader(const PixelShader *shader)
2438         {
2439                 context->pixelShader = shader;
2440
2441                 loadConstants(shader);
2442         }
2443
2444         void Renderer::setVertexShader(const VertexShader *shader)
2445         {
2446                 context->vertexShader = shader;
2447
2448                 loadConstants(shader);
2449         }
2450
2451         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2452         {
2453                 for(int i = 0; i < DRAW_COUNT; i++)
2454                 {
2455                         if(drawCall[i]->psDirtyConstF < index + count)
2456                         {
2457                                 drawCall[i]->psDirtyConstF = index + count;
2458                         }
2459                 }
2460
2461                 for(int i = 0; i < count; i++)
2462                 {
2463                         PixelProcessor::setFloatConstant(index + i, value);
2464                         value += 4;
2465                 }
2466         }
2467
2468         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2469         {
2470                 for(int i = 0; i < DRAW_COUNT; i++)
2471                 {
2472                         if(drawCall[i]->psDirtyConstI < index + count)
2473                         {
2474                                 drawCall[i]->psDirtyConstI = index + count;
2475                         }
2476                 }
2477
2478                 for(int i = 0; i < count; i++)
2479                 {
2480                         PixelProcessor::setIntegerConstant(index + i, value);
2481                         value += 4;
2482                 }
2483         }
2484
2485         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2486         {
2487                 for(int i = 0; i < DRAW_COUNT; i++)
2488                 {
2489                         if(drawCall[i]->psDirtyConstB < index + count)
2490                         {
2491                                 drawCall[i]->psDirtyConstB = index + count;
2492                         }
2493                 }
2494
2495                 for(int i = 0; i < count; i++)
2496                 {
2497                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2498                         boolean++;
2499                 }
2500         }
2501
2502         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2503         {
2504                 for(int i = 0; i < DRAW_COUNT; i++)
2505                 {
2506                         if(drawCall[i]->vsDirtyConstF < index + count)
2507                         {
2508                                 drawCall[i]->vsDirtyConstF = index + count;
2509                         }
2510                 }
2511
2512                 for(int i = 0; i < count; i++)
2513                 {
2514                         VertexProcessor::setFloatConstant(index + i, value);
2515                         value += 4;
2516                 }
2517         }
2518
2519         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2520         {
2521                 for(int i = 0; i < DRAW_COUNT; i++)
2522                 {
2523                         if(drawCall[i]->vsDirtyConstI < index + count)
2524                         {
2525                                 drawCall[i]->vsDirtyConstI = index + count;
2526                         }
2527                 }
2528
2529                 for(int i = 0; i < count; i++)
2530                 {
2531                         VertexProcessor::setIntegerConstant(index + i, value);
2532                         value += 4;
2533                 }
2534         }
2535
2536         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2537         {
2538                 for(int i = 0; i < DRAW_COUNT; i++)
2539                 {
2540                         if(drawCall[i]->vsDirtyConstB < index + count)
2541                         {
2542                                 drawCall[i]->vsDirtyConstB = index + count;
2543                         }
2544                 }
2545
2546                 for(int i = 0; i < count; i++)
2547                 {
2548                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2549                         boolean++;
2550                 }
2551         }
2552
2553         void Renderer::setModelMatrix(const Matrix &M, int i)
2554         {
2555                 VertexProcessor::setModelMatrix(M, i);
2556         }
2557
2558         void Renderer::setViewMatrix(const Matrix &V)
2559         {
2560                 VertexProcessor::setViewMatrix(V);
2561                 updateClipPlanes = true;
2562         }
2563
2564         void Renderer::setBaseMatrix(const Matrix &B)
2565         {
2566                 VertexProcessor::setBaseMatrix(B);
2567                 updateClipPlanes = true;
2568         }
2569
2570         void Renderer::setProjectionMatrix(const Matrix &P)
2571         {
2572                 VertexProcessor::setProjectionMatrix(P);
2573                 updateClipPlanes = true;
2574         }
2575
2576         void Renderer::addQuery(Query *query)
2577         {
2578                 queries.push_back(query);
2579         }
2580
2581         void Renderer::removeQuery(Query *query)
2582         {
2583                 queries.remove(query);
2584         }
2585
2586         #if PERF_HUD
2587                 int Renderer::getThreadCount()
2588                 {
2589                         return threadCount;
2590                 }
2591
2592                 int64_t Renderer::getVertexTime(int thread)
2593                 {
2594                         return vertexTime[thread];
2595                 }
2596
2597                 int64_t Renderer::getSetupTime(int thread)
2598                 {
2599                         return setupTime[thread];
2600                 }
2601
2602                 int64_t Renderer::getPixelTime(int thread)
2603                 {
2604                         return pixelTime[thread];
2605                 }
2606
2607                 void Renderer::resetTimers()
2608                 {
2609                         for(int thread = 0; thread < threadCount; thread++)
2610                         {
2611                                 vertexTime[thread] = 0;
2612                                 setupTime[thread] = 0;
2613                                 pixelTime[thread] = 0;
2614                         }
2615                 }
2616         #endif
2617
2618         void Renderer::setViewport(const Viewport &viewport)
2619         {
2620                 this->viewport = viewport;
2621         }
2622
2623         void Renderer::setScissor(const Rect &scissor)
2624         {
2625                 this->scissor = scissor;
2626         }
2627
2628         void Renderer::setClipFlags(int flags)
2629         {
2630                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2631         }
2632
2633         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2634         {
2635                 if(index < MAX_CLIP_PLANES)
2636                 {
2637                         userPlane[index] = plane;
2638                 }
2639                 else ASSERT(false);
2640
2641                 updateClipPlanes = true;
2642         }
2643
2644         void Renderer::updateConfiguration(bool initialUpdate)
2645         {
2646                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2647
2648                 if(newConfiguration || initialUpdate)
2649                 {
2650                         terminateThreads();
2651
2652                         SwiftConfig::Configuration configuration = {};
2653                         swiftConfig->getConfiguration(configuration);
2654
2655                         precacheVertex = !newConfiguration && configuration.precache;
2656                         precacheSetup = !newConfiguration && configuration.precache;
2657                         precachePixel = !newConfiguration && configuration.precache;
2658
2659                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2660                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2661                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2662
2663                         switch(configuration.textureSampleQuality)
2664                         {
2665                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2666                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2667                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2668                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2669                         }
2670
2671                         switch(configuration.mipmapQuality)
2672                         {
2673                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2674                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2675                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2676                         }
2677
2678                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2679
2680                         switch(configuration.transcendentalPrecision)
2681                         {
2682                         case 0:
2683                                 logPrecision = APPROXIMATE;
2684                                 expPrecision = APPROXIMATE;
2685                                 rcpPrecision = APPROXIMATE;
2686                                 rsqPrecision = APPROXIMATE;
2687                                 break;
2688                         case 1:
2689                                 logPrecision = PARTIAL;
2690                                 expPrecision = PARTIAL;
2691                                 rcpPrecision = PARTIAL;
2692                                 rsqPrecision = PARTIAL;
2693                                 break;
2694                         case 2:
2695                                 logPrecision = ACCURATE;
2696                                 expPrecision = ACCURATE;
2697                                 rcpPrecision = ACCURATE;
2698                                 rsqPrecision = ACCURATE;
2699                                 break;
2700                         case 3:
2701                                 logPrecision = WHQL;
2702                                 expPrecision = WHQL;
2703                                 rcpPrecision = WHQL;
2704                                 rsqPrecision = WHQL;
2705                                 break;
2706                         case 4:
2707                                 logPrecision = IEEE;
2708                                 expPrecision = IEEE;
2709                                 rcpPrecision = IEEE;
2710                                 rsqPrecision = IEEE;
2711                                 break;
2712                         default:
2713                                 logPrecision = ACCURATE;
2714                                 expPrecision = ACCURATE;
2715                                 rcpPrecision = ACCURATE;
2716                                 rsqPrecision = ACCURATE;
2717                                 break;
2718                         }
2719
2720                         switch(configuration.transparencyAntialiasing)
2721                         {
2722                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2723                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2724                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2725                         }
2726
2727                         switch(configuration.threadCount)
2728                         {
2729                         case -1: threadCount = CPUID::coreCount();        break;
2730                         case 0:  threadCount = CPUID::processAffinity();  break;
2731                         default: threadCount = configuration.threadCount; break;
2732                         }
2733
2734                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2735                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2736                         CPUID::setEnableSSE3(configuration.enableSSE3);
2737                         CPUID::setEnableSSE2(configuration.enableSSE2);
2738                         CPUID::setEnableSSE(configuration.enableSSE);
2739
2740                         for(int pass = 0; pass < 10; pass++)
2741                         {
2742                                 optimization[pass] = configuration.optimization[pass];
2743                         }
2744
2745                         forceWindowed = configuration.forceWindowed;
2746                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2747                         postBlendSRGB = configuration.postBlendSRGB;
2748                         exactColorRounding = configuration.exactColorRounding;
2749                         forceClearRegisters = configuration.forceClearRegisters;
2750
2751                 #ifndef NDEBUG
2752                         minPrimitives = configuration.minPrimitives;
2753                         maxPrimitives = configuration.maxPrimitives;
2754                 #endif
2755                 }
2756
2757                 if(!initialUpdate && !worker[0])
2758                 {
2759                         initializeThreads();
2760                 }
2761         }
2762 }