OSDN Git Service

Make the number of vertex inputs configurable.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Math.hpp"
19 #include "FrameBuffer.hpp"
20 #include "Timer.hpp"
21 #include "Surface.hpp"
22 #include "Half.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
27 #include "CPUID.hpp"
28 #include "Memory.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
31 #include "Debug.hpp"
32 #include "Reactor/Reactor.hpp"
33
34 #undef max
35
36 bool disableServer = true;
37
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42
43 namespace sw
44 {
45         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47         extern bool booleanFaceRegister;
48         extern bool fullPixelPositionRegister;
49         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50         extern bool secondaryColor;             // Specular lighting is applied after texturing
51
52         extern bool forceWindowed;
53         extern bool complementaryDepthBuffer;
54         extern bool postBlendSRGB;
55         extern bool exactColorRounding;
56         extern TransparencyAntialiasing transparencyAntialiasing;
57         extern bool forceClearRegisters;
58
59         extern bool precacheVertex;
60         extern bool precacheSetup;
61         extern bool precachePixel;
62
63         int batchSize = 128;
64         int threadCount = 1;
65         int unitCount = 1;
66         int clusterCount = 1;
67
68         TranscendentalPrecision logPrecision = ACCURATE;
69         TranscendentalPrecision expPrecision = ACCURATE;
70         TranscendentalPrecision rcpPrecision = ACCURATE;
71         TranscendentalPrecision rsqPrecision = ACCURATE;
72         bool perspectiveCorrection = true;
73
74         struct Parameters
75         {
76                 Renderer *renderer;
77                 int threadIndex;
78         };
79
80         DrawCall::DrawCall()
81         {
82                 queries = 0;
83
84                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
85                 vsDirtyConstI = 16;
86                 vsDirtyConstB = 16;
87
88                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
89                 psDirtyConstI = 16;
90                 psDirtyConstB = 16;
91
92                 references = -1;
93
94                 data = (DrawData*)allocate(sizeof(DrawData));
95                 data->constants = &constants;
96         }
97
98         DrawCall::~DrawCall()
99         {
100                 delete queries;
101
102                 deallocate(data);
103         }
104
105         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
106         {
107                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
110                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
112                 sw::secondaryColor = conventions.secondaryColor;
113                 sw::exactColorRounding = exactColorRounding;
114
115                 setRenderTarget(0, 0);
116                 clipper = new Clipper();
117
118                 updateViewMatrix = true;
119                 updateBaseMatrix = true;
120                 updateProjectionMatrix = true;
121                 updateClipPlanes = true;
122
123                 #if PERF_HUD
124                         resetTimers();
125                 #endif
126
127                 for(int i = 0; i < 16; i++)
128                 {
129                         vertexTask[i] = 0;
130
131                         worker[i] = 0;
132                         resume[i] = 0;
133                         suspend[i] = 0;
134                 }
135
136                 threadsAwake = 0;
137                 resumeApp = new Event();
138
139                 currentDraw = 0;
140                 nextDraw = 0;
141
142                 qHead = 0;
143                 qSize = 0;
144
145                 for(int i = 0; i < 16; i++)
146                 {
147                         triangleBatch[i] = 0;
148                         primitiveBatch[i] = 0;
149                 }
150
151                 for(int draw = 0; draw < DRAW_COUNT; draw++)
152                 {
153                         drawCall[draw] = new DrawCall();
154                         drawList[draw] = drawCall[draw];
155                 }
156
157                 for(int unit = 0; unit < 16; unit++)
158                 {
159                         primitiveProgress[unit].init();
160                 }
161
162                 for(int cluster = 0; cluster < 16; cluster++)
163                 {
164                         pixelProgress[cluster].init();
165                 }
166
167                 clipFlags = 0;
168
169                 swiftConfig = new SwiftConfig(disableServer);
170                 updateConfiguration(true);
171
172                 sync = new Resource(0);
173         }
174
175         Renderer::~Renderer()
176         {
177                 sync->destruct();
178
179                 delete clipper;
180                 clipper = 0;
181
182                 terminateThreads();
183                 delete resumeApp;
184
185                 for(int draw = 0; draw < DRAW_COUNT; draw++)
186                 {
187                         delete drawCall[draw];
188                 }
189
190                 delete swiftConfig;
191         }
192
193         void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
194         {
195                 blitter.clear(pixel, format, dest, dRect, rgbaMask);
196         }
197
198         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
199         {
200                 blitter.blit(source, sRect, dest, dRect, filter);
201         }
202
203         void Renderer::blit3D(Surface *source, Surface *dest)
204         {
205                 blitter.blit3D(source, dest);
206         }
207
208         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
209         {
210                 #ifndef NDEBUG
211                         if(count < minPrimitives || count > maxPrimitives)
212                         {
213                                 return;
214                         }
215                 #endif
216
217                 context->drawType = drawType;
218
219                 updateConfiguration();
220                 updateClipper();
221
222                 int ss = context->getSuperSampleCount();
223                 int ms = context->getMultiSampleCount();
224
225                 for(int q = 0; q < ss; q++)
226                 {
227                         unsigned int oldMultiSampleMask = context->multiSampleMask;
228                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
229
230                         if(!context->multiSampleMask)
231                         {
232                                 continue;
233                         }
234
235                         sync->lock(sw::PRIVATE);
236
237                         Routine *vertexRoutine;
238                         Routine *setupRoutine;
239                         Routine *pixelRoutine;
240
241                         if(update || oldMultiSampleMask != context->multiSampleMask)
242                         {
243                                 vertexState = VertexProcessor::update();
244                                 setupState = SetupProcessor::update();
245                                 pixelState = PixelProcessor::update();
246
247                                 vertexRoutine = VertexProcessor::routine(vertexState);
248                                 setupRoutine = SetupProcessor::routine(setupState);
249                                 pixelRoutine = PixelProcessor::routine(pixelState);
250                         }
251
252                         int batch = batchSize / ms;
253
254                         int (*setupPrimitives)(Renderer *renderer, int batch, int count);
255
256                         if(context->isDrawTriangle())
257                         {
258                                 switch(context->fillMode)
259                                 {
260                                 case FILL_SOLID:
261                                         setupPrimitives = setupSolidTriangles;
262                                         break;
263                                 case FILL_WIREFRAME:
264                                         setupPrimitives = setupWireframeTriangle;
265                                         batch = 1;
266                                         break;
267                                 case FILL_VERTEX:
268                                         setupPrimitives = setupVertexTriangle;
269                                         batch = 1;
270                                         break;
271                                 default: ASSERT(false);
272                                 }
273                         }
274                         else if(context->isDrawLine())
275                         {
276                                 setupPrimitives = setupLines;
277                         }
278                         else   // Point draw
279                         {
280                                 setupPrimitives = setupPoints;
281                         }
282
283                         DrawCall *draw = 0;
284
285                         do
286                         {
287                                 for(int i = 0; i < DRAW_COUNT; i++)
288                                 {
289                                         if(drawCall[i]->references == -1)
290                                         {
291                                                 draw = drawCall[i];
292                                                 drawList[nextDraw % DRAW_COUNT] = draw;
293
294                                                 break;
295                                         }
296                                 }
297
298                                 if(!draw)
299                                 {
300                                         resumeApp->wait();
301                                 }
302                         }
303                         while(!draw);
304
305                         DrawData *data = draw->data;
306
307                         if(queries.size() != 0)
308                         {
309                                 draw->queries = new std::list<Query*>();
310                                 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
311                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
312                                 {
313                                         Query* q = *query;
314                                         if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
315                                         {
316                                                 atomicIncrement(&(q->reference));
317                                                 draw->queries->push_back(q);
318                                         }
319                                 }
320                         }
321
322                         draw->drawType = drawType;
323                         draw->batchSize = batch;
324
325                         vertexRoutine->bind();
326                         setupRoutine->bind();
327                         pixelRoutine->bind();
328
329                         draw->vertexRoutine = vertexRoutine;
330                         draw->setupRoutine = setupRoutine;
331                         draw->pixelRoutine = pixelRoutine;
332                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
333                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
334                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
335                         draw->setupPrimitives = setupPrimitives;
336                         draw->setupState = setupState;
337
338                         for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
339                         {
340                                 draw->vertexStream[i] = context->input[i].resource;
341                                 data->input[i] = context->input[i].buffer;
342                                 data->stride[i] = context->input[i].stride;
343
344                                 if(draw->vertexStream[i])
345                                 {
346                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
347                                 }
348                         }
349
350                         if(context->indexBuffer)
351                         {
352                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
353                         }
354
355                         draw->indexBuffer = context->indexBuffer;
356
357                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
358                         {
359                                 draw->texture[sampler] = 0;
360                         }
361
362                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
363                         {
364                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
365                                 {
366                                         draw->texture[sampler] = context->texture[sampler];
367                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
368
369                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
370                                 }
371                         }
372
373                         if(context->pixelShader)
374                         {
375                                 if(draw->psDirtyConstF)
376                                 {
377                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
378                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
379                                         draw->psDirtyConstF = 0;
380                                 }
381
382                                 if(draw->psDirtyConstI)
383                                 {
384                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
385                                         draw->psDirtyConstI = 0;
386                                 }
387
388                                 if(draw->psDirtyConstB)
389                                 {
390                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
391                                         draw->psDirtyConstB = 0;
392                                 }
393
394                                 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
395                         }
396                         else
397                         {
398                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
399                                 {
400                                         draw->pUniformBuffers[i] = nullptr;
401                                 }
402                         }
403
404                         if(context->pixelShaderVersion() <= 0x0104)
405                         {
406                                 for(int stage = 0; stage < 8; stage++)
407                                 {
408                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
409                                         {
410                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
411                                         }
412                                         else break;
413                                 }
414                         }
415
416                         if(context->vertexShader)
417                         {
418                                 if(context->vertexShader->getVersion() >= 0x0300)
419                                 {
420                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
421                                         {
422                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
423                                                 {
424                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
425                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
426
427                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
428                                                 }
429                                         }
430                                 }
431
432                                 if(draw->vsDirtyConstF)
433                                 {
434                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
435                                         draw->vsDirtyConstF = 0;
436                                 }
437
438                                 if(draw->vsDirtyConstI)
439                                 {
440                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
441                                         draw->vsDirtyConstI = 0;
442                                 }
443
444                                 if(draw->vsDirtyConstB)
445                                 {
446                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
447                                         draw->vsDirtyConstB = 0;
448                                 }
449
450                                 if(context->vertexShader->instanceIdDeclared)
451                                 {
452                                         data->instanceID = context->instanceID;
453                                 }
454
455                                 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
456                                 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
457                         }
458                         else
459                         {
460                                 data->ff = ff;
461
462                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
463                                 draw->vsDirtyConstI = 16;
464                                 draw->vsDirtyConstB = 16;
465
466                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
467                                 {
468                                         draw->vUniformBuffers[i] = nullptr;
469                                 }
470
471                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
472                                 {
473                                         draw->transformFeedbackBuffers[i] = nullptr;
474                                 }
475                         }
476
477                         if(pixelState.stencilActive)
478                         {
479                                 data->stencil[0] = stencil;
480                                 data->stencil[1] = stencilCCW;
481                         }
482
483                         if(pixelState.fogActive)
484                         {
485                                 data->fog = fog;
486                         }
487
488                         if(setupState.isDrawPoint)
489                         {
490                                 data->point = point;
491                         }
492
493                         data->lineWidth = context->lineWidth;
494
495                         data->factor = factor;
496
497                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
498                         {
499                                 float ref = context->alphaReference * (1.0f / 255.0f);
500                                 float margin = sw::min(ref, 1.0f - ref);
501
502                                 if(ms == 4)
503                                 {
504                                         data->a2c0 = replicate(ref - margin * 0.6f);
505                                         data->a2c1 = replicate(ref - margin * 0.2f);
506                                         data->a2c2 = replicate(ref + margin * 0.2f);
507                                         data->a2c3 = replicate(ref + margin * 0.6f);
508                                 }
509                                 else if(ms == 2)
510                                 {
511                                         data->a2c0 = replicate(ref - margin * 0.3f);
512                                         data->a2c1 = replicate(ref + margin * 0.3f);
513                                 }
514                                 else ASSERT(false);
515                         }
516
517                         if(pixelState.occlusionEnabled)
518                         {
519                                 for(int cluster = 0; cluster < clusterCount; cluster++)
520                                 {
521                                         data->occlusion[cluster] = 0;
522                                 }
523                         }
524
525                         #if PERF_PROFILE
526                                 for(int cluster = 0; cluster < clusterCount; cluster++)
527                                 {
528                                         for(int i = 0; i < PERF_TIMERS; i++)
529                                         {
530                                                 data->cycles[i][cluster] = 0;
531                                         }
532                                 }
533                         #endif
534
535                         // Viewport
536                         {
537                                 float W = 0.5f * viewport.width;
538                                 float H = 0.5f * viewport.height;
539                                 float X0 = viewport.x0 + W;
540                                 float Y0 = viewport.y0 + H;
541                                 float N = viewport.minZ;
542                                 float F = viewport.maxZ;
543                                 float Z = F - N;
544
545                                 if(context->isDrawTriangle(false))
546                                 {
547                                         N += depthBias;
548                                 }
549
550                                 if(complementaryDepthBuffer)
551                                 {
552                                         Z = -Z;
553                                         N = 1 - N;
554                                 }
555
556                                 static const float X[5][16] =   // Fragment offsets
557                                 {
558                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
559                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
560                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
561                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
562                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
563                                 };
564
565                                 static const float Y[5][16] =   // Fragment offsets
566                                 {
567                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
568                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
569                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
570                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
571                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
572                                 };
573
574                                 int s = sw::log2(ss);
575
576                                 data->Wx16 = replicate(W * 16);
577                                 data->Hx16 = replicate(H * 16);
578                                 data->X0x16 = replicate(X0 * 16 - 8);
579                                 data->Y0x16 = replicate(Y0 * 16 - 8);
580                                 data->XXXX = replicate(X[s][q] / W);
581                                 data->YYYY = replicate(Y[s][q] / H);
582                                 data->halfPixelX = replicate(0.5f / W);
583                                 data->halfPixelY = replicate(0.5f / H);
584                                 data->viewportHeight = abs(viewport.height);
585                                 data->slopeDepthBias = slopeDepthBias;
586                                 data->depthRange = Z;
587                                 data->depthNear = N;
588                                 draw->clipFlags = clipFlags;
589
590                                 if(clipFlags)
591                                 {
592                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
593                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
594                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
595                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
596                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
597                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
598                                 }
599                         }
600
601                         // Target
602                         {
603                                 for(int index = 0; index < RENDERTARGETS; index++)
604                                 {
605                                         draw->renderTarget[index] = context->renderTarget[index];
606
607                                         if(draw->renderTarget[index])
608                                         {
609                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
610                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
611                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
612                                         }
613                                 }
614
615                                 draw->depthBuffer = context->depthBuffer;
616                                 draw->stencilBuffer = context->stencilBuffer;
617
618                                 if(draw->depthBuffer)
619                                 {
620                                         data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
621                                         data->depthPitchB = context->depthBuffer->getInternalPitchB();
622                                         data->depthSliceB = context->depthBuffer->getInternalSliceB();
623                                 }
624
625                                 if(draw->stencilBuffer)
626                                 {
627                                         data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(q * ms, MANAGED);
628                                         data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
629                                         data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
630                                 }
631                         }
632
633                         // Scissor
634                         {
635                                 data->scissorX0 = scissor.x0;
636                                 data->scissorX1 = scissor.x1;
637                                 data->scissorY0 = scissor.y0;
638                                 data->scissorY1 = scissor.y1;
639                         }
640
641                         draw->primitive = 0;
642                         draw->count = count;
643
644                         draw->references = (count + batch - 1) / batch;
645
646                         schedulerMutex.lock();
647                         nextDraw++;
648                         schedulerMutex.unlock();
649
650                         if(threadCount > 1)
651                         {
652                                 if(!threadsAwake)
653                                 {
654                                         suspend[0]->wait();
655
656                                         threadsAwake = 1;
657                                         task[0].type = Task::RESUME;
658
659                                         resume[0]->signal();
660                                 }
661                         }
662                         else   // Use main thread for draw execution
663                         {
664                                 threadsAwake = 1;
665                                 task[0].type = Task::RESUME;
666
667                                 taskLoop(0);
668                         }
669                 }
670         }
671
672         void Renderer::threadFunction(void *parameters)
673         {
674                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
675                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
676
677                 if(logPrecision < IEEE)
678                 {
679                         CPUID::setFlushToZero(true);
680                         CPUID::setDenormalsAreZero(true);
681                 }
682
683                 renderer->threadLoop(threadIndex);
684         }
685
686         void Renderer::threadLoop(int threadIndex)
687         {
688                 while(!exitThreads)
689                 {
690                         taskLoop(threadIndex);
691
692                         suspend[threadIndex]->signal();
693                         resume[threadIndex]->wait();
694                 }
695         }
696
697         void Renderer::taskLoop(int threadIndex)
698         {
699                 while(task[threadIndex].type != Task::SUSPEND)
700                 {
701                         scheduleTask(threadIndex);
702                         executeTask(threadIndex);
703                 }
704         }
705
706         void Renderer::findAvailableTasks()
707         {
708                 // Find pixel tasks
709                 for(int cluster = 0; cluster < clusterCount; cluster++)
710                 {
711                         if(!pixelProgress[cluster].executing)
712                         {
713                                 for(int unit = 0; unit < unitCount; unit++)
714                                 {
715                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
716                                         {
717                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
718                                                 {
719                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
720                                                         {
721                                                                 Task &task = taskQueue[qHead];
722                                                                 task.type = Task::PIXELS;
723                                                                 task.primitiveUnit = unit;
724                                                                 task.pixelCluster = cluster;
725
726                                                                 pixelProgress[cluster].executing = true;
727
728                                                                 // Commit to the task queue
729                                                                 qHead = (qHead + 1) % 32;
730                                                                 qSize++;
731
732                                                                 break;
733                                                         }
734                                                 }
735                                         }
736                                 }
737                         }
738                 }
739
740                 // Find primitive tasks
741                 if(currentDraw == nextDraw)
742                 {
743                         return;   // No more primitives to process
744                 }
745
746                 for(int unit = 0; unit < unitCount; unit++)
747                 {
748                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
749
750                         if(draw->primitive >= draw->count)
751                         {
752                                 currentDraw++;
753
754                                 if(currentDraw == nextDraw)
755                                 {
756                                         return;   // No more primitives to process
757                                 }
758
759                                 draw = drawList[currentDraw % DRAW_COUNT];
760                         }
761
762                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
763                         {
764                                 int primitive = draw->primitive;
765                                 int count = draw->count;
766                                 int batch = draw->batchSize;
767
768                                 primitiveProgress[unit].drawCall = currentDraw;
769                                 primitiveProgress[unit].firstPrimitive = primitive;
770                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
771
772                                 draw->primitive += batch;
773
774                                 Task &task = taskQueue[qHead];
775                                 task.type = Task::PRIMITIVES;
776                                 task.primitiveUnit = unit;
777
778                                 primitiveProgress[unit].references = -1;
779
780                                 // Commit to the task queue
781                                 qHead = (qHead + 1) % 32;
782                                 qSize++;
783                         }
784                 }
785         }
786
787         void Renderer::scheduleTask(int threadIndex)
788         {
789                 schedulerMutex.lock();
790
791                 if((int)qSize < threadCount - threadsAwake + 1)
792                 {
793                         findAvailableTasks();
794                 }
795
796                 if(qSize != 0)
797                 {
798                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
799                         qSize--;
800
801                         if(threadsAwake != threadCount)
802                         {
803                                 int wakeup = qSize - threadsAwake + 1;
804
805                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
806                                 {
807                                         if(task[i].type == Task::SUSPEND)
808                                         {
809                                                 suspend[i]->wait();
810                                                 task[i].type = Task::RESUME;
811                                                 resume[i]->signal();
812
813                                                 threadsAwake++;
814                                                 wakeup--;
815                                         }
816                                 }
817                         }
818                 }
819                 else
820                 {
821                         task[threadIndex].type = Task::SUSPEND;
822
823                         threadsAwake--;
824                 }
825
826                 schedulerMutex.unlock();
827         }
828
829         void Renderer::executeTask(int threadIndex)
830         {
831                 #if PERF_HUD
832                         int64_t startTick = Timer::ticks();
833                 #endif
834
835                 switch(task[threadIndex].type)
836                 {
837                 case Task::PRIMITIVES:
838                         {
839                                 int unit = task[threadIndex].primitiveUnit;
840
841                                 int input = primitiveProgress[unit].firstPrimitive;
842                                 int count = primitiveProgress[unit].primitiveCount;
843                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
844                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
845
846                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
847
848                                 #if PERF_HUD
849                                         int64_t time = Timer::ticks();
850                                         vertexTime[threadIndex] += time - startTick;
851                                         startTick = time;
852                                 #endif
853
854                                 int visible = draw->setupState.rasterizerDiscard ? 0 : setupPrimitives(this, unit, count);
855
856                                 primitiveProgress[unit].visible = visible;
857                                 primitiveProgress[unit].references = clusterCount;
858
859                                 #if PERF_HUD
860                                         setupTime[threadIndex] += Timer::ticks() - startTick;
861                                 #endif
862                         }
863                         break;
864                 case Task::PIXELS:
865                         {
866                                 int unit = task[threadIndex].primitiveUnit;
867                                 int visible = primitiveProgress[unit].visible;
868
869                                 if(visible > 0)
870                                 {
871                                         int cluster = task[threadIndex].pixelCluster;
872                                         Primitive *primitive = primitiveBatch[unit];
873                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
874                                         DrawData *data = draw->data;
875                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
876
877                                         pixelRoutine(primitive, visible, cluster, data);
878                                 }
879
880                                 finishRendering(task[threadIndex]);
881
882                                 #if PERF_HUD
883                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
884                                 #endif
885                         }
886                         break;
887                 case Task::RESUME:
888                         break;
889                 case Task::SUSPEND:
890                         break;
891                 default:
892                         ASSERT(false);
893                 }
894         }
895
896         void Renderer::synchronize()
897         {
898                 sync->lock(sw::PUBLIC);
899                 sync->unlock();
900         }
901
902         void Renderer::finishRendering(Task &pixelTask)
903         {
904                 int unit = pixelTask.primitiveUnit;
905                 int cluster = pixelTask.pixelCluster;
906
907                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
908                 DrawData &data = *draw.data;
909                 int primitive = primitiveProgress[unit].firstPrimitive;
910                 int count = primitiveProgress[unit].primitiveCount;
911                 int processedPrimitives = primitive + count;
912
913                 pixelProgress[cluster].processedPrimitives = processedPrimitives;
914
915                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
916                 {
917                         pixelProgress[cluster].drawCall++;
918                         pixelProgress[cluster].processedPrimitives = 0;
919                 }
920
921                 int ref = atomicDecrement(&primitiveProgress[unit].references);
922
923                 if(ref == 0)
924                 {
925                         ref = atomicDecrement(&draw.references);
926
927                         if(ref == 0)
928                         {
929                                 #if PERF_PROFILE
930                                         for(int cluster = 0; cluster < clusterCount; cluster++)
931                                         {
932                                                 for(int i = 0; i < PERF_TIMERS; i++)
933                                                 {
934                                                         profiler.cycles[i] += data.cycles[i][cluster];
935                                                 }
936                                         }
937                                 #endif
938
939                                 if(draw.queries)
940                                 {
941                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
942                                         {
943                                                 Query *query = *q;
944
945                                                 switch(query->type)
946                                                 {
947                                                 case Query::FRAGMENTS_PASSED:
948                                                         for(int cluster = 0; cluster < clusterCount; cluster++)
949                                                         {
950                                                                 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
951                                                         }
952                                                         break;
953                                                 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
954                                                         atomicAdd((volatile int*)&query->data, processedPrimitives);
955                                                         break;
956                                                 default:
957                                                         break;
958                                                 }
959
960                                                 atomicDecrement(&query->reference);
961                                         }
962
963                                         delete draw.queries;
964                                         draw.queries = 0;
965                                 }
966
967                                 for(int i = 0; i < RENDERTARGETS; i++)
968                                 {
969                                         if(draw.renderTarget[i])
970                                         {
971                                                 draw.renderTarget[i]->unlockInternal();
972                                         }
973                                 }
974
975                                 if(draw.depthBuffer)
976                                 {
977                                         draw.depthBuffer->unlockInternal();
978                                 }
979
980                                 if(draw.stencilBuffer)
981                                 {
982                                         draw.stencilBuffer->unlockStencil();
983                                 }
984
985                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
986                                 {
987                                         if(draw.texture[i])
988                                         {
989                                                 draw.texture[i]->unlock();
990                                         }
991                                 }
992
993                                 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
994                                 {
995                                         if(draw.vertexStream[i])
996                                         {
997                                                 draw.vertexStream[i]->unlock();
998                                         }
999                                 }
1000
1001                                 if(draw.indexBuffer)
1002                                 {
1003                                         draw.indexBuffer->unlock();
1004                                 }
1005
1006                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1007                                 {
1008                                         if(draw.pUniformBuffers[i])
1009                                         {
1010                                                 draw.pUniformBuffers[i]->unlock();
1011                                         }
1012                                         if(draw.vUniformBuffers[i])
1013                                         {
1014                                                 draw.vUniformBuffers[i]->unlock();
1015                                         }
1016                                 }
1017
1018                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1019                                 {
1020                                         if(draw.transformFeedbackBuffers[i])
1021                                         {
1022                                                 draw.transformFeedbackBuffers[i]->unlock();
1023                                         }
1024                                 }
1025
1026                                 draw.vertexRoutine->unbind();
1027                                 draw.setupRoutine->unbind();
1028                                 draw.pixelRoutine->unbind();
1029
1030                                 sync->unlock();
1031
1032                                 draw.references = -1;
1033                                 resumeApp->signal();
1034                         }
1035                 }
1036
1037                 pixelProgress[cluster].executing = false;
1038         }
1039
1040         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1041         {
1042                 Triangle *triangle = triangleBatch[unit];
1043                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1044                 DrawData *data = draw->data;
1045                 VertexTask *task = vertexTask[thread];
1046
1047                 const void *indices = data->indices;
1048                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1049
1050                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1051                 {
1052                         task->vertexCache.clear();
1053                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1054                 }
1055
1056                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1057
1058                 switch(draw->drawType)
1059                 {
1060                 case DRAW_POINTLIST:
1061                         {
1062                                 unsigned int index = start;
1063
1064                                 for(unsigned int i = 0; i < triangleCount; i++)
1065                                 {
1066                                         batch[i][0] = index;
1067                                         batch[i][1] = index;
1068                                         batch[i][2] = index;
1069
1070                                         index += 1;
1071                                 }
1072                         }
1073                         break;
1074                 case DRAW_LINELIST:
1075                         {
1076                                 unsigned int index = 2 * start;
1077
1078                                 for(unsigned int i = 0; i < triangleCount; i++)
1079                                 {
1080                                         batch[i][0] = index + 0;
1081                                         batch[i][1] = index + 1;
1082                                         batch[i][2] = index + 1;
1083
1084                                         index += 2;
1085                                 }
1086                         }
1087                         break;
1088                 case DRAW_LINESTRIP:
1089                         {
1090                                 unsigned int index = start;
1091
1092                                 for(unsigned int i = 0; i < triangleCount; i++)
1093                                 {
1094                                         batch[i][0] = index + 0;
1095                                         batch[i][1] = index + 1;
1096                                         batch[i][2] = index + 1;
1097
1098                                         index += 1;
1099                                 }
1100                         }
1101                         break;
1102                 case DRAW_LINELOOP:
1103                         {
1104                                 unsigned int index = start;
1105
1106                                 for(unsigned int i = 0; i < triangleCount; i++)
1107                                 {
1108                                         batch[i][0] = (index + 0) % loop;
1109                                         batch[i][1] = (index + 1) % loop;
1110                                         batch[i][2] = (index + 1) % loop;
1111
1112                                         index += 1;
1113                                 }
1114                         }
1115                         break;
1116                 case DRAW_TRIANGLELIST:
1117                         {
1118                                 unsigned int index = 3 * start;
1119
1120                                 for(unsigned int i = 0; i < triangleCount; i++)
1121                                 {
1122                                         batch[i][0] = index + 0;
1123                                         batch[i][1] = index + 1;
1124                                         batch[i][2] = index + 2;
1125
1126                                         index += 3;
1127                                 }
1128                         }
1129                         break;
1130                 case DRAW_TRIANGLESTRIP:
1131                         {
1132                                 unsigned int index = start;
1133
1134                                 for(unsigned int i = 0; i < triangleCount; i++)
1135                                 {
1136                                         batch[i][0] = index + 0;
1137                                         batch[i][1] = index + (index & 1) + 1;
1138                                         batch[i][2] = index + (~index & 1) + 1;
1139
1140                                         index += 1;
1141                                 }
1142                         }
1143                         break;
1144                 case DRAW_TRIANGLEFAN:
1145                         {
1146                                 unsigned int index = start;
1147
1148                                 for(unsigned int i = 0; i < triangleCount; i++)
1149                                 {
1150                                         batch[i][0] = index + 1;
1151                                         batch[i][1] = index + 2;
1152                                         batch[i][2] = 0;
1153
1154                                         index += 1;
1155                                 }
1156                         }
1157                         break;
1158                 case DRAW_INDEXEDPOINTLIST8:
1159                         {
1160                                 const unsigned char *index = (const unsigned char*)indices + start;
1161
1162                                 for(unsigned int i = 0; i < triangleCount; i++)
1163                                 {
1164                                         batch[i][0] = *index;
1165                                         batch[i][1] = *index;
1166                                         batch[i][2] = *index;
1167
1168                                         index += 1;
1169                                 }
1170                         }
1171                         break;
1172                 case DRAW_INDEXEDPOINTLIST16:
1173                         {
1174                                 const unsigned short *index = (const unsigned short*)indices + start;
1175
1176                                 for(unsigned int i = 0; i < triangleCount; i++)
1177                                 {
1178                                         batch[i][0] = *index;
1179                                         batch[i][1] = *index;
1180                                         batch[i][2] = *index;
1181
1182                                         index += 1;
1183                                 }
1184                         }
1185                         break;
1186                 case DRAW_INDEXEDPOINTLIST32:
1187                         {
1188                                 const unsigned int *index = (const unsigned int*)indices + start;
1189
1190                                 for(unsigned int i = 0; i < triangleCount; i++)
1191                                 {
1192                                         batch[i][0] = *index;
1193                                         batch[i][1] = *index;
1194                                         batch[i][2] = *index;
1195
1196                                         index += 1;
1197                                 }
1198                         }
1199                         break;
1200                 case DRAW_INDEXEDLINELIST8:
1201                         {
1202                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1203
1204                                 for(unsigned int i = 0; i < triangleCount; i++)
1205                                 {
1206                                         batch[i][0] = index[0];
1207                                         batch[i][1] = index[1];
1208                                         batch[i][2] = index[1];
1209
1210                                         index += 2;
1211                                 }
1212                         }
1213                         break;
1214                 case DRAW_INDEXEDLINELIST16:
1215                         {
1216                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1217
1218                                 for(unsigned int i = 0; i < triangleCount; i++)
1219                                 {
1220                                         batch[i][0] = index[0];
1221                                         batch[i][1] = index[1];
1222                                         batch[i][2] = index[1];
1223
1224                                         index += 2;
1225                                 }
1226                         }
1227                         break;
1228                 case DRAW_INDEXEDLINELIST32:
1229                         {
1230                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1231
1232                                 for(unsigned int i = 0; i < triangleCount; i++)
1233                                 {
1234                                         batch[i][0] = index[0];
1235                                         batch[i][1] = index[1];
1236                                         batch[i][2] = index[1];
1237
1238                                         index += 2;
1239                                 }
1240                         }
1241                         break;
1242                 case DRAW_INDEXEDLINESTRIP8:
1243                         {
1244                                 const unsigned char *index = (const unsigned char*)indices + start;
1245
1246                                 for(unsigned int i = 0; i < triangleCount; i++)
1247                                 {
1248                                         batch[i][0] = index[0];
1249                                         batch[i][1] = index[1];
1250                                         batch[i][2] = index[1];
1251
1252                                         index += 1;
1253                                 }
1254                         }
1255                         break;
1256                 case DRAW_INDEXEDLINESTRIP16:
1257                         {
1258                                 const unsigned short *index = (const unsigned short*)indices + start;
1259
1260                                 for(unsigned int i = 0; i < triangleCount; i++)
1261                                 {
1262                                         batch[i][0] = index[0];
1263                                         batch[i][1] = index[1];
1264                                         batch[i][2] = index[1];
1265
1266                                         index += 1;
1267                                 }
1268                         }
1269                         break;
1270                 case DRAW_INDEXEDLINESTRIP32:
1271                         {
1272                                 const unsigned int *index = (const unsigned int*)indices + start;
1273
1274                                 for(unsigned int i = 0; i < triangleCount; i++)
1275                                 {
1276                                         batch[i][0] = index[0];
1277                                         batch[i][1] = index[1];
1278                                         batch[i][2] = index[1];
1279
1280                                         index += 1;
1281                                 }
1282                         }
1283                         break;
1284                 case DRAW_INDEXEDLINELOOP8:
1285                         {
1286                                 const unsigned char *index = (const unsigned char*)indices;
1287
1288                                 for(unsigned int i = 0; i < triangleCount; i++)
1289                                 {
1290                                         batch[i][0] = index[(start + i + 0) % loop];
1291                                         batch[i][1] = index[(start + i + 1) % loop];
1292                                         batch[i][2] = index[(start + i + 1) % loop];
1293                                 }
1294                         }
1295                         break;
1296                 case DRAW_INDEXEDLINELOOP16:
1297                         {
1298                                 const unsigned short *index = (const unsigned short*)indices;
1299
1300                                 for(unsigned int i = 0; i < triangleCount; i++)
1301                                 {
1302                                         batch[i][0] = index[(start + i + 0) % loop];
1303                                         batch[i][1] = index[(start + i + 1) % loop];
1304                                         batch[i][2] = index[(start + i + 1) % loop];
1305                                 }
1306                         }
1307                         break;
1308                 case DRAW_INDEXEDLINELOOP32:
1309                         {
1310                                 const unsigned int *index = (const unsigned int*)indices;
1311
1312                                 for(unsigned int i = 0; i < triangleCount; i++)
1313                                 {
1314                                         batch[i][0] = index[(start + i + 0) % loop];
1315                                         batch[i][1] = index[(start + i + 1) % loop];
1316                                         batch[i][2] = index[(start + i + 1) % loop];
1317                                 }
1318                         }
1319                         break;
1320                 case DRAW_INDEXEDTRIANGLELIST8:
1321                         {
1322                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1323
1324                                 for(unsigned int i = 0; i < triangleCount; i++)
1325                                 {
1326                                         batch[i][0] = index[0];
1327                                         batch[i][1] = index[1];
1328                                         batch[i][2] = index[2];
1329
1330                                         index += 3;
1331                                 }
1332                         }
1333                         break;
1334                 case DRAW_INDEXEDTRIANGLELIST16:
1335                         {
1336                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1337
1338                                 for(unsigned int i = 0; i < triangleCount; i++)
1339                                 {
1340                                         batch[i][0] = index[0];
1341                                         batch[i][1] = index[1];
1342                                         batch[i][2] = index[2];
1343
1344                                         index += 3;
1345                                 }
1346                         }
1347                         break;
1348                 case DRAW_INDEXEDTRIANGLELIST32:
1349                         {
1350                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1351
1352                                 for(unsigned int i = 0; i < triangleCount; i++)
1353                                 {
1354                                         batch[i][0] = index[0];
1355                                         batch[i][1] = index[1];
1356                                         batch[i][2] = index[2];
1357
1358                                         index += 3;
1359                                 }
1360                         }
1361                         break;
1362                 case DRAW_INDEXEDTRIANGLESTRIP8:
1363                         {
1364                                 const unsigned char *index = (const unsigned char*)indices + start;
1365
1366                                 for(unsigned int i = 0; i < triangleCount; i++)
1367                                 {
1368                                         batch[i][0] = index[0];
1369                                         batch[i][1] = index[((start + i) & 1) + 1];
1370                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1371
1372                                         index += 1;
1373                                 }
1374                         }
1375                         break;
1376                 case DRAW_INDEXEDTRIANGLESTRIP16:
1377                         {
1378                                 const unsigned short *index = (const unsigned short*)indices + start;
1379
1380                                 for(unsigned int i = 0; i < triangleCount; i++)
1381                                 {
1382                                         batch[i][0] = index[0];
1383                                         batch[i][1] = index[((start + i) & 1) + 1];
1384                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1385
1386                                         index += 1;
1387                                 }
1388                         }
1389                         break;
1390                 case DRAW_INDEXEDTRIANGLESTRIP32:
1391                         {
1392                                 const unsigned int *index = (const unsigned int*)indices + start;
1393
1394                                 for(unsigned int i = 0; i < triangleCount; i++)
1395                                 {
1396                                         batch[i][0] = index[0];
1397                                         batch[i][1] = index[((start + i) & 1) + 1];
1398                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1399
1400                                         index += 1;
1401                                 }
1402                         }
1403                         break;
1404                 case DRAW_INDEXEDTRIANGLEFAN8:
1405                         {
1406                                 const unsigned char *index = (const unsigned char*)indices;
1407
1408                                 for(unsigned int i = 0; i < triangleCount; i++)
1409                                 {
1410                                         batch[i][0] = index[start + i + 1];
1411                                         batch[i][1] = index[start + i + 2];
1412                                         batch[i][2] = index[0];
1413                                 }
1414                         }
1415                         break;
1416                 case DRAW_INDEXEDTRIANGLEFAN16:
1417                         {
1418                                 const unsigned short *index = (const unsigned short*)indices;
1419
1420                                 for(unsigned int i = 0; i < triangleCount; i++)
1421                                 {
1422                                         batch[i][0] = index[start + i + 1];
1423                                         batch[i][1] = index[start + i + 2];
1424                                         batch[i][2] = index[0];
1425                                 }
1426                         }
1427                         break;
1428                 case DRAW_INDEXEDTRIANGLEFAN32:
1429                         {
1430                                 const unsigned int *index = (const unsigned int*)indices;
1431
1432                                 for(unsigned int i = 0; i < triangleCount; i++)
1433                                 {
1434                                         batch[i][0] = index[start + i + 1];
1435                                         batch[i][1] = index[start + i + 2];
1436                                         batch[i][2] = index[0];
1437                                 }
1438                         }
1439                         break;
1440                 case DRAW_QUADLIST:
1441                         {
1442                                 unsigned int index = 4 * start / 2;
1443
1444                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1445                                 {
1446                                         batch[i+0][0] = index + 0;
1447                                         batch[i+0][1] = index + 1;
1448                                         batch[i+0][2] = index + 2;
1449
1450                                         batch[i+1][0] = index + 0;
1451                                         batch[i+1][1] = index + 2;
1452                                         batch[i+1][2] = index + 3;
1453
1454                                         index += 4;
1455                                 }
1456                         }
1457                         break;
1458                 default:
1459                         ASSERT(false);
1460                         return;
1461                 }
1462
1463                 task->vertexStart = start * 3;
1464                 task->vertexCount = triangleCount * 3;
1465                 // Note: Quads aren't handled for verticesPerPrimitive, but verticesPerPrimitive is used for transform feedback,
1466                 //       which is an OpenGL ES 3.0 feature, and OpenGL ES 3.0 doesn't support quads as a primitive type.
1467                 DrawType type = static_cast<DrawType>(static_cast<unsigned int>(draw->drawType) & 0xF);
1468                 task->verticesPerPrimitive = 1 + (type >= DRAW_LINELIST) + (type >= DRAW_TRIANGLELIST);
1469                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1470         }
1471
1472         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1473         {
1474                 Triangle *triangle = renderer->triangleBatch[unit];
1475                 Primitive *primitive = renderer->primitiveBatch[unit];
1476
1477                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1478                 SetupProcessor::State &state = draw.setupState;
1479                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1480
1481                 int ms = state.multiSample;
1482                 int pos = state.positionRegister;
1483                 const DrawData *data = draw.data;
1484                 int visible = 0;
1485
1486                 for(int i = 0; i < count; i++, triangle++)
1487                 {
1488                         Vertex &v0 = triangle->v0;
1489                         Vertex &v1 = triangle->v1;
1490                         Vertex &v2 = triangle->v2;
1491
1492                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1493                         {
1494                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1495
1496                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1497
1498                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1499                                 {
1500                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1501                                         {
1502                                                 continue;
1503                                         }
1504                                 }
1505
1506                                 if(setupRoutine(primitive, triangle, &polygon, data))
1507                                 {
1508                                         primitive += ms;
1509                                         visible++;
1510                                 }
1511                         }
1512                 }
1513
1514                 return visible;
1515         }
1516
1517         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1518         {
1519                 Triangle *triangle = renderer->triangleBatch[unit];
1520                 Primitive *primitive = renderer->primitiveBatch[unit];
1521                 int visible = 0;
1522
1523                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1524                 SetupProcessor::State &state = draw.setupState;
1525                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1526
1527                 const Vertex &v0 = triangle[0].v0;
1528                 const Vertex &v1 = triangle[0].v1;
1529                 const Vertex &v2 = triangle[0].v2;
1530
1531                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1532
1533                 if(state.cullMode == CULL_CLOCKWISE)
1534                 {
1535                         if(d >= 0) return 0;
1536                 }
1537                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1538                 {
1539                         if(d <= 0) return 0;
1540                 }
1541
1542                 // Copy attributes
1543                 triangle[1].v0 = v1;
1544                 triangle[1].v1 = v2;
1545                 triangle[2].v0 = v2;
1546                 triangle[2].v1 = v0;
1547
1548                 if(state.color[0][0].flat)   // FIXME
1549                 {
1550                         for(int i = 0; i < 2; i++)
1551                         {
1552                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1553                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1554                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1555                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1556                         }
1557                 }
1558
1559                 for(int i = 0; i < 3; i++)
1560                 {
1561                         if(setupLine(renderer, *primitive, *triangle, draw))
1562                         {
1563                                 primitive->area = 0.5f * d;
1564
1565                                 primitive++;
1566                                 visible++;
1567                         }
1568
1569                         triangle++;
1570                 }
1571
1572                 return visible;
1573         }
1574
1575         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1576         {
1577                 Triangle *triangle = renderer->triangleBatch[unit];
1578                 Primitive *primitive = renderer->primitiveBatch[unit];
1579                 int visible = 0;
1580
1581                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1582                 SetupProcessor::State &state = draw.setupState;
1583
1584                 const Vertex &v0 = triangle[0].v0;
1585                 const Vertex &v1 = triangle[0].v1;
1586                 const Vertex &v2 = triangle[0].v2;
1587
1588                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1589
1590                 if(state.cullMode == CULL_CLOCKWISE)
1591                 {
1592                         if(d >= 0) return 0;
1593                 }
1594                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1595                 {
1596                         if(d <= 0) return 0;
1597                 }
1598
1599                 // Copy attributes
1600                 triangle[1].v0 = v1;
1601                 triangle[2].v0 = v2;
1602
1603                 for(int i = 0; i < 3; i++)
1604                 {
1605                         if(setupPoint(renderer, *primitive, *triangle, draw))
1606                         {
1607                                 primitive->area = 0.5f * d;
1608
1609                                 primitive++;
1610                                 visible++;
1611                         }
1612
1613                         triangle++;
1614                 }
1615
1616                 return visible;
1617         }
1618
1619         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1620         {
1621                 Triangle *triangle = renderer->triangleBatch[unit];
1622                 Primitive *primitive = renderer->primitiveBatch[unit];
1623                 int visible = 0;
1624
1625                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1626                 SetupProcessor::State &state = draw.setupState;
1627
1628                 int ms = state.multiSample;
1629
1630                 for(int i = 0; i < count; i++)
1631                 {
1632                         if(setupLine(renderer, *primitive, *triangle, draw))
1633                         {
1634                                 primitive += ms;
1635                                 visible++;
1636                         }
1637
1638                         triangle++;
1639                 }
1640
1641                 return visible;
1642         }
1643
1644         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1645         {
1646                 Triangle *triangle = renderer->triangleBatch[unit];
1647                 Primitive *primitive = renderer->primitiveBatch[unit];
1648                 int visible = 0;
1649
1650                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1651                 SetupProcessor::State &state = draw.setupState;
1652
1653                 int ms = state.multiSample;
1654
1655                 for(int i = 0; i < count; i++)
1656                 {
1657                         if(setupPoint(renderer, *primitive, *triangle, draw))
1658                         {
1659                                 primitive += ms;
1660                                 visible++;
1661                         }
1662
1663                         triangle++;
1664                 }
1665
1666                 return visible;
1667         }
1668
1669         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1670         {
1671                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1672                 const SetupProcessor::State &state = draw.setupState;
1673                 const DrawData &data = *draw.data;
1674
1675                 float lineWidth = data.lineWidth;
1676
1677                 Vertex &v0 = triangle.v0;
1678                 Vertex &v1 = triangle.v1;
1679
1680                 int pos = state.positionRegister;
1681
1682                 const float4 &P0 = v0.v[pos];
1683                 const float4 &P1 = v1.v[pos];
1684
1685                 if(P0.w <= 0 && P1.w <= 0)
1686                 {
1687                         return false;
1688                 }
1689
1690                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1691                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1692
1693                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1694                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1695
1696                 if(dx == 0 && dy == 0)
1697                 {
1698                         return false;
1699                 }
1700
1701                 if(false)   // Rectangle
1702                 {
1703                         float4 P[4];
1704                         int C[4];
1705
1706                         P[0] = P0;
1707                         P[1] = P1;
1708                         P[2] = P1;
1709                         P[3] = P0;
1710
1711                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1712
1713                         dx *= scale;
1714                         dy *= scale;
1715
1716                         float dx0w = dx * P0.w / W;
1717                         float dy0h = dy * P0.w / H;
1718                         float dx0h = dx * P0.w / H;
1719                         float dy0w = dy * P0.w / W;
1720
1721                         float dx1w = dx * P1.w / W;
1722                         float dy1h = dy * P1.w / H;
1723                         float dx1h = dx * P1.w / H;
1724                         float dy1w = dy * P1.w / W;
1725
1726                         P[0].x += -dy0w + -dx0w;
1727                         P[0].y += -dx0h + +dy0h;
1728                         C[0] = computeClipFlags(P[0], data);
1729
1730                         P[1].x += -dy1w + +dx1w;
1731                         P[1].y += -dx1h + +dy1h;
1732                         C[1] = computeClipFlags(P[1], data);
1733
1734                         P[2].x += +dy1w + +dx1w;
1735                         P[2].y += +dx1h + -dy1h;
1736                         C[2] = computeClipFlags(P[2], data);
1737
1738                         P[3].x += +dy0w + -dx0w;
1739                         P[3].y += +dx0h + +dy0h;
1740                         C[3] = computeClipFlags(P[3], data);
1741
1742                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1743                         {
1744                                 Polygon polygon(P, 4);
1745
1746                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1747
1748                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1749                                 {
1750                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1751                                         {
1752                                                 return false;
1753                                         }
1754                                 }
1755
1756                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1757                         }
1758                 }
1759                 else   // Diamond test convention
1760                 {
1761                         float4 P[8];
1762                         int C[8];
1763
1764                         P[0] = P0;
1765                         P[1] = P0;
1766                         P[2] = P0;
1767                         P[3] = P0;
1768                         P[4] = P1;
1769                         P[5] = P1;
1770                         P[6] = P1;
1771                         P[7] = P1;
1772
1773                         float dx0 = lineWidth * 0.5f * P0.w / W;
1774                         float dy0 = lineWidth * 0.5f * P0.w / H;
1775
1776                         float dx1 = lineWidth * 0.5f * P1.w / W;
1777                         float dy1 = lineWidth * 0.5f * P1.w / H;
1778
1779                         P[0].x += -dx0;
1780                         C[0] = computeClipFlags(P[0], data);
1781
1782                         P[1].y += +dy0;
1783                         C[1] = computeClipFlags(P[1], data);
1784
1785                         P[2].x += +dx0;
1786                         C[2] = computeClipFlags(P[2], data);
1787
1788                         P[3].y += -dy0;
1789                         C[3] = computeClipFlags(P[3], data);
1790
1791                         P[4].x += -dx1;
1792                         C[4] = computeClipFlags(P[4], data);
1793
1794                         P[5].y += +dy1;
1795                         C[5] = computeClipFlags(P[5], data);
1796
1797                         P[6].x += +dx1;
1798                         C[6] = computeClipFlags(P[6], data);
1799
1800                         P[7].y += -dy1;
1801                         C[7] = computeClipFlags(P[7], data);
1802
1803                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1804                         {
1805                                 float4 L[6];
1806
1807                                 if(dx > -dy)
1808                                 {
1809                                         if(dx > dy)   // Right
1810                                         {
1811                                                 L[0] = P[0];
1812                                                 L[1] = P[1];
1813                                                 L[2] = P[5];
1814                                                 L[3] = P[6];
1815                                                 L[4] = P[7];
1816                                                 L[5] = P[3];
1817                                         }
1818                                         else   // Down
1819                                         {
1820                                                 L[0] = P[0];
1821                                                 L[1] = P[4];
1822                                                 L[2] = P[5];
1823                                                 L[3] = P[6];
1824                                                 L[4] = P[2];
1825                                                 L[5] = P[3];
1826                                         }
1827                                 }
1828                                 else
1829                                 {
1830                                         if(dx > dy)   // Up
1831                                         {
1832                                                 L[0] = P[0];
1833                                                 L[1] = P[1];
1834                                                 L[2] = P[2];
1835                                                 L[3] = P[6];
1836                                                 L[4] = P[7];
1837                                                 L[5] = P[4];
1838                                         }
1839                                         else   // Left
1840                                         {
1841                                                 L[0] = P[1];
1842                                                 L[1] = P[2];
1843                                                 L[2] = P[3];
1844                                                 L[3] = P[7];
1845                                                 L[4] = P[4];
1846                                                 L[5] = P[5];
1847                                         }
1848                                 }
1849
1850                                 Polygon polygon(L, 6);
1851
1852                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1853
1854                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1855                                 {
1856                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1857                                         {
1858                                                 return false;
1859                                         }
1860                                 }
1861
1862                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1863                         }
1864                 }
1865
1866                 return false;
1867         }
1868
1869         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1870         {
1871                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1872                 const SetupProcessor::State &state = draw.setupState;
1873                 const DrawData &data = *draw.data;
1874
1875                 Vertex &v = triangle.v0;
1876
1877                 float pSize;
1878
1879                 int pts = state.pointSizeRegister;
1880
1881                 if(state.pointSizeRegister != Unused)
1882                 {
1883                         pSize = v.v[pts].y;
1884                 }
1885                 else
1886                 {
1887                         pSize = data.point.pointSize[0];
1888                 }
1889
1890                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1891
1892                 float4 P[4];
1893                 int C[4];
1894
1895                 int pos = state.positionRegister;
1896
1897                 P[0] = v.v[pos];
1898                 P[1] = v.v[pos];
1899                 P[2] = v.v[pos];
1900                 P[3] = v.v[pos];
1901
1902                 const float X = pSize * P[0].w * data.halfPixelX[0];
1903                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1904
1905                 P[0].x -= X;
1906                 P[0].y += Y;
1907                 C[0] = computeClipFlags(P[0], data);
1908
1909                 P[1].x += X;
1910                 P[1].y += Y;
1911                 C[1] = computeClipFlags(P[1], data);
1912
1913                 P[2].x += X;
1914                 P[2].y -= Y;
1915                 C[2] = computeClipFlags(P[2], data);
1916
1917                 P[3].x -= X;
1918                 P[3].y -= Y;
1919                 C[3] = computeClipFlags(P[3], data);
1920
1921                 triangle.v1 = triangle.v0;
1922                 triangle.v2 = triangle.v0;
1923
1924                 triangle.v1.X += iround(16 * 0.5f * pSize);
1925                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1926
1927                 Polygon polygon(P, 4);
1928
1929                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1930                 {
1931                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1932
1933                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1934                         {
1935                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1936                                 {
1937                                         return false;
1938                                 }
1939                         }
1940
1941                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1942                 }
1943
1944                 return false;
1945         }
1946
1947         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1948         {
1949                 return ((v.x > v.w)  << 0) |
1950                        ((v.y > v.w)  << 1) |
1951                        ((v.z > v.w)  << 2) |
1952                        ((v.x < -v.w) << 3) |
1953                        ((v.y < -v.w) << 4) |
1954                        ((v.z < 0)    << 5) |
1955                        Clipper::CLIP_FINITE;   // FIXME: xyz finite
1956         }
1957
1958         void Renderer::initializeThreads()
1959         {
1960                 unitCount = ceilPow2(threadCount);
1961                 clusterCount = ceilPow2(threadCount);
1962
1963                 for(int i = 0; i < unitCount; i++)
1964                 {
1965                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1966                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1967                 }
1968
1969                 for(int i = 0; i < threadCount; i++)
1970                 {
1971                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1972                         vertexTask[i]->vertexCache.drawCall = -1;
1973
1974                         task[i].type = Task::SUSPEND;
1975
1976                         resume[i] = new Event();
1977                         suspend[i] = new Event();
1978
1979                         Parameters parameters;
1980                         parameters.threadIndex = i;
1981                         parameters.renderer = this;
1982
1983                         exitThreads = false;
1984                         worker[i] = new Thread(threadFunction, &parameters);
1985
1986                         suspend[i]->wait();
1987                         suspend[i]->signal();
1988                 }
1989         }
1990
1991         void Renderer::terminateThreads()
1992         {
1993                 while(threadsAwake != 0)
1994                 {
1995                         Thread::sleep(1);
1996                 }
1997
1998                 for(int thread = 0; thread < threadCount; thread++)
1999                 {
2000                         if(worker[thread])
2001                         {
2002                                 exitThreads = true;
2003                                 resume[thread]->signal();
2004                                 worker[thread]->join();
2005
2006                                 delete worker[thread];
2007                                 worker[thread] = 0;
2008                                 delete resume[thread];
2009                                 resume[thread] = 0;
2010                                 delete suspend[thread];
2011                                 suspend[thread] = 0;
2012                         }
2013
2014                         deallocate(vertexTask[thread]);
2015                         vertexTask[thread] = 0;
2016                 }
2017
2018                 for(int i = 0; i < 16; i++)
2019                 {
2020                         deallocate(triangleBatch[i]);
2021                         triangleBatch[i] = 0;
2022
2023                         deallocate(primitiveBatch[i]);
2024                         primitiveBatch[i] = 0;
2025                 }
2026         }
2027
2028         void Renderer::loadConstants(const VertexShader *vertexShader)
2029         {
2030                 if(!vertexShader) return;
2031
2032                 size_t count = vertexShader->getLength();
2033
2034                 for(size_t i = 0; i < count; i++)
2035                 {
2036                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2037
2038                         if(instruction->opcode == Shader::OPCODE_DEF)
2039                         {
2040                                 int index = instruction->dst.index;
2041                                 float value[4];
2042
2043                                 value[0] = instruction->src[0].value[0];
2044                                 value[1] = instruction->src[0].value[1];
2045                                 value[2] = instruction->src[0].value[2];
2046                                 value[3] = instruction->src[0].value[3];
2047
2048                                 setVertexShaderConstantF(index, value);
2049                         }
2050                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2051                         {
2052                                 int index = instruction->dst.index;
2053                                 int integer[4];
2054
2055                                 integer[0] = instruction->src[0].integer[0];
2056                                 integer[1] = instruction->src[0].integer[1];
2057                                 integer[2] = instruction->src[0].integer[2];
2058                                 integer[3] = instruction->src[0].integer[3];
2059
2060                                 setVertexShaderConstantI(index, integer);
2061                         }
2062                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2063                         {
2064                                 int index = instruction->dst.index;
2065                                 int boolean = instruction->src[0].boolean[0];
2066
2067                                 setVertexShaderConstantB(index, &boolean);
2068                         }
2069                 }
2070         }
2071
2072         void Renderer::loadConstants(const PixelShader *pixelShader)
2073         {
2074                 if(!pixelShader) return;
2075
2076                 size_t count = pixelShader->getLength();
2077
2078                 for(size_t i = 0; i < count; i++)
2079                 {
2080                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2081
2082                         if(instruction->opcode == Shader::OPCODE_DEF)
2083                         {
2084                                 int index = instruction->dst.index;
2085                                 float value[4];
2086
2087                                 value[0] = instruction->src[0].value[0];
2088                                 value[1] = instruction->src[0].value[1];
2089                                 value[2] = instruction->src[0].value[2];
2090                                 value[3] = instruction->src[0].value[3];
2091
2092                                 setPixelShaderConstantF(index, value);
2093                         }
2094                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2095                         {
2096                                 int index = instruction->dst.index;
2097                                 int integer[4];
2098
2099                                 integer[0] = instruction->src[0].integer[0];
2100                                 integer[1] = instruction->src[0].integer[1];
2101                                 integer[2] = instruction->src[0].integer[2];
2102                                 integer[3] = instruction->src[0].integer[3];
2103
2104                                 setPixelShaderConstantI(index, integer);
2105                         }
2106                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2107                         {
2108                                 int index = instruction->dst.index;
2109                                 int boolean = instruction->src[0].boolean[0];
2110
2111                                 setPixelShaderConstantB(index, &boolean);
2112                         }
2113                 }
2114         }
2115
2116         void Renderer::setIndexBuffer(Resource *indexBuffer)
2117         {
2118                 context->indexBuffer = indexBuffer;
2119         }
2120
2121         void Renderer::setMultiSampleMask(unsigned int mask)
2122         {
2123                 context->sampleMask = mask;
2124         }
2125
2126         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2127         {
2128                 sw::transparencyAntialiasing = transparencyAntialiasing;
2129         }
2130
2131         bool Renderer::isReadWriteTexture(int sampler)
2132         {
2133                 for(int index = 0; index < RENDERTARGETS; index++)
2134                 {
2135                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2136                         {
2137                                 return true;
2138                         }
2139                 }
2140
2141                 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2142                 {
2143                         return true;
2144                 }
2145
2146                 return false;
2147         }
2148
2149         void Renderer::updateClipper()
2150         {
2151                 if(updateClipPlanes)
2152                 {
2153                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2154                         {
2155                                 const Matrix &scissorWorld = getViewTransform();
2156
2157                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2158                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2159                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2160                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2161                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2162                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2163                         }
2164                         else   // User plane in clip space
2165                         {
2166                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2167                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2168                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2169                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2170                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2171                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2172                         }
2173
2174                         updateClipPlanes = false;
2175                 }
2176         }
2177
2178         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2179         {
2180                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2181
2182                 context->texture[sampler] = resource;
2183         }
2184
2185         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2186         {
2187                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2188
2189                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2190         }
2191
2192         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2193         {
2194                 if(type == SAMPLER_PIXEL)
2195                 {
2196                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2197                 }
2198                 else
2199                 {
2200                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2201                 }
2202         }
2203
2204         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2205         {
2206                 if(type == SAMPLER_PIXEL)
2207                 {
2208                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2209                 }
2210                 else
2211                 {
2212                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2213                 }
2214         }
2215
2216         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2217         {
2218                 if(type == SAMPLER_PIXEL)
2219                 {
2220                         PixelProcessor::setGatherEnable(sampler, enable);
2221                 }
2222                 else
2223                 {
2224                         VertexProcessor::setGatherEnable(sampler, enable);
2225                 }
2226         }
2227
2228         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2229         {
2230                 if(type == SAMPLER_PIXEL)
2231                 {
2232                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2233                 }
2234                 else
2235                 {
2236                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2237                 }
2238         }
2239
2240         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2241         {
2242                 if(type == SAMPLER_PIXEL)
2243                 {
2244                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2245                 }
2246                 else
2247                 {
2248                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2249                 }
2250         }
2251
2252         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2253         {
2254                 if(type == SAMPLER_PIXEL)
2255                 {
2256                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2257                 }
2258                 else
2259                 {
2260                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2261                 }
2262         }
2263
2264         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2265         {
2266                 if(type == SAMPLER_PIXEL)
2267                 {
2268                         PixelProcessor::setReadSRGB(sampler, sRGB);
2269                 }
2270                 else
2271                 {
2272                         VertexProcessor::setReadSRGB(sampler, sRGB);
2273                 }
2274         }
2275
2276         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2277         {
2278                 if(type == SAMPLER_PIXEL)
2279                 {
2280                         PixelProcessor::setMipmapLOD(sampler, bias);
2281                 }
2282                 else
2283                 {
2284                         VertexProcessor::setMipmapLOD(sampler, bias);
2285                 }
2286         }
2287
2288         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2289         {
2290                 if(type == SAMPLER_PIXEL)
2291                 {
2292                         PixelProcessor::setBorderColor(sampler, borderColor);
2293                 }
2294                 else
2295                 {
2296                         VertexProcessor::setBorderColor(sampler, borderColor);
2297                 }
2298         }
2299
2300         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2301         {
2302                 if(type == SAMPLER_PIXEL)
2303                 {
2304                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2305                 }
2306                 else
2307                 {
2308                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2309                 }
2310         }
2311
2312         void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2313         {
2314                 if(type == SAMPLER_PIXEL)
2315                 {
2316                         PixelProcessor::setSwizzleR(sampler, swizzleR);
2317                 }
2318                 else
2319                 {
2320                         VertexProcessor::setSwizzleR(sampler, swizzleR);
2321                 }
2322         }
2323
2324         void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2325         {
2326                 if(type == SAMPLER_PIXEL)
2327                 {
2328                         PixelProcessor::setSwizzleG(sampler, swizzleG);
2329                 }
2330                 else
2331                 {
2332                         VertexProcessor::setSwizzleG(sampler, swizzleG);
2333                 }
2334         }
2335
2336         void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2337         {
2338                 if(type == SAMPLER_PIXEL)
2339                 {
2340                         PixelProcessor::setSwizzleB(sampler, swizzleB);
2341                 }
2342                 else
2343                 {
2344                         VertexProcessor::setSwizzleB(sampler, swizzleB);
2345                 }
2346         }
2347
2348         void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2349         {
2350                 if(type == SAMPLER_PIXEL)
2351                 {
2352                         PixelProcessor::setSwizzleA(sampler, swizzleA);
2353                 }
2354                 else
2355                 {
2356                         VertexProcessor::setSwizzleA(sampler, swizzleA);
2357                 }
2358         }
2359
2360         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2361         {
2362                 context->setPointSpriteEnable(pointSpriteEnable);
2363         }
2364
2365         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2366         {
2367                 context->setPointScaleEnable(pointScaleEnable);
2368         }
2369
2370         void Renderer::setLineWidth(float width)
2371         {
2372                 context->lineWidth = width;
2373         }
2374
2375         void Renderer::setDepthBias(float bias)
2376         {
2377                 depthBias = bias;
2378         }
2379
2380         void Renderer::setSlopeDepthBias(float slopeBias)
2381         {
2382                 slopeDepthBias = slopeBias;
2383         }
2384
2385         void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2386         {
2387                 context->rasterizerDiscard = rasterizerDiscard;
2388         }
2389
2390         void Renderer::setPixelShader(const PixelShader *shader)
2391         {
2392                 context->pixelShader = shader;
2393
2394                 loadConstants(shader);
2395         }
2396
2397         void Renderer::setVertexShader(const VertexShader *shader)
2398         {
2399                 context->vertexShader = shader;
2400
2401                 loadConstants(shader);
2402         }
2403
2404         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2405         {
2406                 for(int i = 0; i < DRAW_COUNT; i++)
2407                 {
2408                         if(drawCall[i]->psDirtyConstF < index + count)
2409                         {
2410                                 drawCall[i]->psDirtyConstF = index + count;
2411                         }
2412                 }
2413
2414                 for(int i = 0; i < count; i++)
2415                 {
2416                         PixelProcessor::setFloatConstant(index + i, value);
2417                         value += 4;
2418                 }
2419         }
2420
2421         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2422         {
2423                 for(int i = 0; i < DRAW_COUNT; i++)
2424                 {
2425                         if(drawCall[i]->psDirtyConstI < index + count)
2426                         {
2427                                 drawCall[i]->psDirtyConstI = index + count;
2428                         }
2429                 }
2430
2431                 for(int i = 0; i < count; i++)
2432                 {
2433                         PixelProcessor::setIntegerConstant(index + i, value);
2434                         value += 4;
2435                 }
2436         }
2437
2438         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2439         {
2440                 for(int i = 0; i < DRAW_COUNT; i++)
2441                 {
2442                         if(drawCall[i]->psDirtyConstB < index + count)
2443                         {
2444                                 drawCall[i]->psDirtyConstB = index + count;
2445                         }
2446                 }
2447
2448                 for(int i = 0; i < count; i++)
2449                 {
2450                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2451                         boolean++;
2452                 }
2453         }
2454
2455         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2456         {
2457                 for(int i = 0; i < DRAW_COUNT; i++)
2458                 {
2459                         if(drawCall[i]->vsDirtyConstF < index + count)
2460                         {
2461                                 drawCall[i]->vsDirtyConstF = index + count;
2462                         }
2463                 }
2464
2465                 for(int i = 0; i < count; i++)
2466                 {
2467                         VertexProcessor::setFloatConstant(index + i, value);
2468                         value += 4;
2469                 }
2470         }
2471
2472         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2473         {
2474                 for(int i = 0; i < DRAW_COUNT; i++)
2475                 {
2476                         if(drawCall[i]->vsDirtyConstI < index + count)
2477                         {
2478                                 drawCall[i]->vsDirtyConstI = index + count;
2479                         }
2480                 }
2481
2482                 for(int i = 0; i < count; i++)
2483                 {
2484                         VertexProcessor::setIntegerConstant(index + i, value);
2485                         value += 4;
2486                 }
2487         }
2488
2489         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2490         {
2491                 for(int i = 0; i < DRAW_COUNT; i++)
2492                 {
2493                         if(drawCall[i]->vsDirtyConstB < index + count)
2494                         {
2495                                 drawCall[i]->vsDirtyConstB = index + count;
2496                         }
2497                 }
2498
2499                 for(int i = 0; i < count; i++)
2500                 {
2501                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2502                         boolean++;
2503                 }
2504         }
2505
2506         void Renderer::setModelMatrix(const Matrix &M, int i)
2507         {
2508                 VertexProcessor::setModelMatrix(M, i);
2509         }
2510
2511         void Renderer::setViewMatrix(const Matrix &V)
2512         {
2513                 VertexProcessor::setViewMatrix(V);
2514                 updateClipPlanes = true;
2515         }
2516
2517         void Renderer::setBaseMatrix(const Matrix &B)
2518         {
2519                 VertexProcessor::setBaseMatrix(B);
2520                 updateClipPlanes = true;
2521         }
2522
2523         void Renderer::setProjectionMatrix(const Matrix &P)
2524         {
2525                 VertexProcessor::setProjectionMatrix(P);
2526                 updateClipPlanes = true;
2527         }
2528
2529         void Renderer::addQuery(Query *query)
2530         {
2531                 queries.push_back(query);
2532         }
2533
2534         void Renderer::removeQuery(Query *query)
2535         {
2536                 queries.remove(query);
2537         }
2538
2539         #if PERF_HUD
2540                 int Renderer::getThreadCount()
2541                 {
2542                         return threadCount;
2543                 }
2544
2545                 int64_t Renderer::getVertexTime(int thread)
2546                 {
2547                         return vertexTime[thread];
2548                 }
2549
2550                 int64_t Renderer::getSetupTime(int thread)
2551                 {
2552                         return setupTime[thread];
2553                 }
2554
2555                 int64_t Renderer::getPixelTime(int thread)
2556                 {
2557                         return pixelTime[thread];
2558                 }
2559
2560                 void Renderer::resetTimers()
2561                 {
2562                         for(int thread = 0; thread < threadCount; thread++)
2563                         {
2564                                 vertexTime[thread] = 0;
2565                                 setupTime[thread] = 0;
2566                                 pixelTime[thread] = 0;
2567                         }
2568                 }
2569         #endif
2570
2571         void Renderer::setViewport(const Viewport &viewport)
2572         {
2573                 this->viewport = viewport;
2574         }
2575
2576         void Renderer::setScissor(const Rect &scissor)
2577         {
2578                 this->scissor = scissor;
2579         }
2580
2581         void Renderer::setClipFlags(int flags)
2582         {
2583                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2584         }
2585
2586         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2587         {
2588                 if(index < MAX_CLIP_PLANES)
2589                 {
2590                         userPlane[index] = plane;
2591                 }
2592                 else ASSERT(false);
2593
2594                 updateClipPlanes = true;
2595         }
2596
2597         void Renderer::updateConfiguration(bool initialUpdate)
2598         {
2599                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2600
2601                 if(newConfiguration || initialUpdate)
2602                 {
2603                         terminateThreads();
2604
2605                         SwiftConfig::Configuration configuration = {};
2606                         swiftConfig->getConfiguration(configuration);
2607
2608                         precacheVertex = !newConfiguration && configuration.precache;
2609                         precacheSetup = !newConfiguration && configuration.precache;
2610                         precachePixel = !newConfiguration && configuration.precache;
2611
2612                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2613                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2614                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2615
2616                         switch(configuration.textureSampleQuality)
2617                         {
2618                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2619                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2620                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2621                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2622                         }
2623
2624                         switch(configuration.mipmapQuality)
2625                         {
2626                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2627                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2628                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2629                         }
2630
2631                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2632
2633                         switch(configuration.transcendentalPrecision)
2634                         {
2635                         case 0:
2636                                 logPrecision = APPROXIMATE;
2637                                 expPrecision = APPROXIMATE;
2638                                 rcpPrecision = APPROXIMATE;
2639                                 rsqPrecision = APPROXIMATE;
2640                                 break;
2641                         case 1:
2642                                 logPrecision = PARTIAL;
2643                                 expPrecision = PARTIAL;
2644                                 rcpPrecision = PARTIAL;
2645                                 rsqPrecision = PARTIAL;
2646                                 break;
2647                         case 2:
2648                                 logPrecision = ACCURATE;
2649                                 expPrecision = ACCURATE;
2650                                 rcpPrecision = ACCURATE;
2651                                 rsqPrecision = ACCURATE;
2652                                 break;
2653                         case 3:
2654                                 logPrecision = WHQL;
2655                                 expPrecision = WHQL;
2656                                 rcpPrecision = WHQL;
2657                                 rsqPrecision = WHQL;
2658                                 break;
2659                         case 4:
2660                                 logPrecision = IEEE;
2661                                 expPrecision = IEEE;
2662                                 rcpPrecision = IEEE;
2663                                 rsqPrecision = IEEE;
2664                                 break;
2665                         default:
2666                                 logPrecision = ACCURATE;
2667                                 expPrecision = ACCURATE;
2668                                 rcpPrecision = ACCURATE;
2669                                 rsqPrecision = ACCURATE;
2670                                 break;
2671                         }
2672
2673                         switch(configuration.transparencyAntialiasing)
2674                         {
2675                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2676                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2677                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2678                         }
2679
2680                         switch(configuration.threadCount)
2681                         {
2682                         case -1: threadCount = CPUID::coreCount();        break;
2683                         case 0:  threadCount = CPUID::processAffinity();  break;
2684                         default: threadCount = configuration.threadCount; break;
2685                         }
2686
2687                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2688                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2689                         CPUID::setEnableSSE3(configuration.enableSSE3);
2690                         CPUID::setEnableSSE2(configuration.enableSSE2);
2691                         CPUID::setEnableSSE(configuration.enableSSE);
2692
2693                         for(int pass = 0; pass < 10; pass++)
2694                         {
2695                                 optimization[pass] = configuration.optimization[pass];
2696                         }
2697
2698                         forceWindowed = configuration.forceWindowed;
2699                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2700                         postBlendSRGB = configuration.postBlendSRGB;
2701                         exactColorRounding = configuration.exactColorRounding;
2702                         forceClearRegisters = configuration.forceClearRegisters;
2703
2704                 #ifndef NDEBUG
2705                         minPrimitives = configuration.minPrimitives;
2706                         maxPrimitives = configuration.maxPrimitives;
2707                 #endif
2708                 }
2709
2710                 if(!initialUpdate && !worker[0])
2711                 {
2712                         initializeThreads();
2713                 }
2714         }
2715 }