OSDN Git Service

Fix retaining the processing routines when no update.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Math.hpp"
19 #include "FrameBuffer.hpp"
20 #include "Timer.hpp"
21 #include "Surface.hpp"
22 #include "Half.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
27 #include "CPUID.hpp"
28 #include "Memory.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
31 #include "Debug.hpp"
32 #include "Reactor/Reactor.hpp"
33
34 #undef max
35
36 bool disableServer = true;
37
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42
43 namespace sw
44 {
45         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47         extern bool booleanFaceRegister;
48         extern bool fullPixelPositionRegister;
49         extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50         extern bool secondaryColor;             // Specular lighting is applied after texturing
51
52         extern bool forceWindowed;
53         extern bool complementaryDepthBuffer;
54         extern bool postBlendSRGB;
55         extern bool exactColorRounding;
56         extern TransparencyAntialiasing transparencyAntialiasing;
57         extern bool forceClearRegisters;
58
59         extern bool precacheVertex;
60         extern bool precacheSetup;
61         extern bool precachePixel;
62
63         int batchSize = 128;
64         int threadCount = 1;
65         int unitCount = 1;
66         int clusterCount = 1;
67
68         TranscendentalPrecision logPrecision = ACCURATE;
69         TranscendentalPrecision expPrecision = ACCURATE;
70         TranscendentalPrecision rcpPrecision = ACCURATE;
71         TranscendentalPrecision rsqPrecision = ACCURATE;
72         bool perspectiveCorrection = true;
73
74         struct Parameters
75         {
76                 Renderer *renderer;
77                 int threadIndex;
78         };
79
80         DrawCall::DrawCall()
81         {
82                 queries = 0;
83
84                 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
85                 vsDirtyConstI = 16;
86                 vsDirtyConstB = 16;
87
88                 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
89                 psDirtyConstI = 16;
90                 psDirtyConstB = 16;
91
92                 references = -1;
93
94                 data = (DrawData*)allocate(sizeof(DrawData));
95                 data->constants = &constants;
96         }
97
98         DrawCall::~DrawCall()
99         {
100                 delete queries;
101
102                 deallocate(data);
103         }
104
105         Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
106         {
107                 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108                 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109                 sw::booleanFaceRegister = conventions.booleanFaceRegister;
110                 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111                 sw::leadingVertexFirst = conventions.leadingVertexFirst;
112                 sw::secondaryColor = conventions.secondaryColor;
113                 sw::exactColorRounding = exactColorRounding;
114
115                 setRenderTarget(0, 0);
116                 clipper = new Clipper(symmetricNormalizedDepth);
117
118                 updateViewMatrix = true;
119                 updateBaseMatrix = true;
120                 updateProjectionMatrix = true;
121                 updateClipPlanes = true;
122
123                 #if PERF_HUD
124                         resetTimers();
125                 #endif
126
127                 for(int i = 0; i < 16; i++)
128                 {
129                         vertexTask[i] = 0;
130
131                         worker[i] = 0;
132                         resume[i] = 0;
133                         suspend[i] = 0;
134                 }
135
136                 threadsAwake = 0;
137                 resumeApp = new Event();
138
139                 currentDraw = 0;
140                 nextDraw = 0;
141
142                 qHead = 0;
143                 qSize = 0;
144
145                 for(int i = 0; i < 16; i++)
146                 {
147                         triangleBatch[i] = 0;
148                         primitiveBatch[i] = 0;
149                 }
150
151                 for(int draw = 0; draw < DRAW_COUNT; draw++)
152                 {
153                         drawCall[draw] = new DrawCall();
154                         drawList[draw] = drawCall[draw];
155                 }
156
157                 for(int unit = 0; unit < 16; unit++)
158                 {
159                         primitiveProgress[unit].init();
160                 }
161
162                 for(int cluster = 0; cluster < 16; cluster++)
163                 {
164                         pixelProgress[cluster].init();
165                 }
166
167                 clipFlags = 0;
168
169                 swiftConfig = new SwiftConfig(disableServer);
170                 updateConfiguration(true);
171
172                 sync = new Resource(0);
173         }
174
175         Renderer::~Renderer()
176         {
177                 sync->destruct();
178
179                 delete clipper;
180                 clipper = 0;
181
182                 terminateThreads();
183                 delete resumeApp;
184
185                 for(int draw = 0; draw < DRAW_COUNT; draw++)
186                 {
187                         delete drawCall[draw];
188                 }
189
190                 delete swiftConfig;
191         }
192
193         void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
194         {
195                 blitter.clear(pixel, format, dest, dRect, rgbaMask);
196         }
197
198         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
199         {
200                 blitter.blit(source, sRect, dest, dRect, filter);
201         }
202
203         void Renderer::blit3D(Surface *source, Surface *dest)
204         {
205                 blitter.blit3D(source, dest);
206         }
207
208         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
209         {
210                 #ifndef NDEBUG
211                         if(count < minPrimitives || count > maxPrimitives)
212                         {
213                                 return;
214                         }
215                 #endif
216
217                 context->drawType = drawType;
218
219                 updateConfiguration();
220                 updateClipper();
221
222                 int ss = context->getSuperSampleCount();
223                 int ms = context->getMultiSampleCount();
224
225                 for(int q = 0; q < ss; q++)
226                 {
227                         unsigned int oldMultiSampleMask = context->multiSampleMask;
228                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
229
230                         if(!context->multiSampleMask)
231                         {
232                                 continue;
233                         }
234
235                         sync->lock(sw::PRIVATE);
236
237                         if(update || oldMultiSampleMask != context->multiSampleMask)
238                         {
239                                 vertexState = VertexProcessor::update(drawType);
240                                 setupState = SetupProcessor::update();
241                                 pixelState = PixelProcessor::update();
242
243                                 vertexRoutine = VertexProcessor::routine(vertexState);
244                                 setupRoutine = SetupProcessor::routine(setupState);
245                                 pixelRoutine = PixelProcessor::routine(pixelState);
246                         }
247
248                         int batch = batchSize / ms;
249
250                         int (Renderer::*setupPrimitives)(int batch, int count);
251
252                         if(context->isDrawTriangle())
253                         {
254                                 switch(context->fillMode)
255                                 {
256                                 case FILL_SOLID:
257                                         setupPrimitives = &Renderer::setupSolidTriangles;
258                                         break;
259                                 case FILL_WIREFRAME:
260                                         setupPrimitives = &Renderer::setupWireframeTriangle;
261                                         batch = 1;
262                                         break;
263                                 case FILL_VERTEX:
264                                         setupPrimitives = &Renderer::setupVertexTriangle;
265                                         batch = 1;
266                                         break;
267                                 default: ASSERT(false);
268                                 }
269                         }
270                         else if(context->isDrawLine())
271                         {
272                                 setupPrimitives = &Renderer::setupLines;
273                         }
274                         else   // Point draw
275                         {
276                                 setupPrimitives = &Renderer::setupPoints;
277                         }
278
279                         DrawCall *draw = 0;
280
281                         do
282                         {
283                                 for(int i = 0; i < DRAW_COUNT; i++)
284                                 {
285                                         if(drawCall[i]->references == -1)
286                                         {
287                                                 draw = drawCall[i];
288                                                 drawList[nextDraw % DRAW_COUNT] = draw;
289
290                                                 break;
291                                         }
292                                 }
293
294                                 if(!draw)
295                                 {
296                                         resumeApp->wait();
297                                 }
298                         }
299                         while(!draw);
300
301                         DrawData *data = draw->data;
302
303                         if(queries.size() != 0)
304                         {
305                                 draw->queries = new std::list<Query*>();
306                                 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
307                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
308                                 {
309                                         Query* q = *query;
310                                         if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
311                                         {
312                                                 atomicIncrement(&(q->reference));
313                                                 draw->queries->push_back(q);
314                                         }
315                                 }
316                         }
317
318                         draw->drawType = drawType;
319                         draw->batchSize = batch;
320
321                         vertexRoutine->bind();
322                         setupRoutine->bind();
323                         pixelRoutine->bind();
324
325                         draw->vertexRoutine = vertexRoutine;
326                         draw->setupRoutine = setupRoutine;
327                         draw->pixelRoutine = pixelRoutine;
328                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
329                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
330                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
331                         draw->setupPrimitives = setupPrimitives;
332                         draw->setupState = setupState;
333
334                         for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
335                         {
336                                 draw->vertexStream[i] = context->input[i].resource;
337                                 data->input[i] = context->input[i].buffer;
338                                 data->stride[i] = context->input[i].stride;
339
340                                 if(draw->vertexStream[i])
341                                 {
342                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
343                                 }
344                         }
345
346                         if(context->indexBuffer)
347                         {
348                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
349                         }
350
351                         draw->indexBuffer = context->indexBuffer;
352
353                         for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
354                         {
355                                 draw->texture[sampler] = 0;
356                         }
357
358                         for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
359                         {
360                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
361                                 {
362                                         draw->texture[sampler] = context->texture[sampler];
363                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
364
365                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
366                                 }
367                         }
368
369                         if(context->pixelShader)
370                         {
371                                 if(draw->psDirtyConstF)
372                                 {
373                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
374                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
375                                         draw->psDirtyConstF = 0;
376                                 }
377
378                                 if(draw->psDirtyConstI)
379                                 {
380                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
381                                         draw->psDirtyConstI = 0;
382                                 }
383
384                                 if(draw->psDirtyConstB)
385                                 {
386                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
387                                         draw->psDirtyConstB = 0;
388                                 }
389
390                                 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
391                         }
392                         else
393                         {
394                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
395                                 {
396                                         draw->pUniformBuffers[i] = nullptr;
397                                 }
398                         }
399
400                         if(context->pixelShaderVersion() <= 0x0104)
401                         {
402                                 for(int stage = 0; stage < 8; stage++)
403                                 {
404                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
405                                         {
406                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
407                                         }
408                                         else break;
409                                 }
410                         }
411
412                         if(context->vertexShader)
413                         {
414                                 if(context->vertexShader->getVersion() >= 0x0300)
415                                 {
416                                         for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
417                                         {
418                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
419                                                 {
420                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
421                                                         draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
422
423                                                         data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
424                                                 }
425                                         }
426                                 }
427
428                                 if(draw->vsDirtyConstF)
429                                 {
430                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
431                                         draw->vsDirtyConstF = 0;
432                                 }
433
434                                 if(draw->vsDirtyConstI)
435                                 {
436                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
437                                         draw->vsDirtyConstI = 0;
438                                 }
439
440                                 if(draw->vsDirtyConstB)
441                                 {
442                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
443                                         draw->vsDirtyConstB = 0;
444                                 }
445
446                                 if(context->vertexShader->instanceIdDeclared)
447                                 {
448                                         data->instanceID = context->instanceID;
449                                 }
450
451                                 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
452                                 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
453                         }
454                         else
455                         {
456                                 data->ff = ff;
457
458                                 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
459                                 draw->vsDirtyConstI = 16;
460                                 draw->vsDirtyConstB = 16;
461
462                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
463                                 {
464                                         draw->vUniformBuffers[i] = nullptr;
465                                 }
466
467                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
468                                 {
469                                         draw->transformFeedbackBuffers[i] = nullptr;
470                                 }
471                         }
472
473                         if(pixelState.stencilActive)
474                         {
475                                 data->stencil[0] = stencil;
476                                 data->stencil[1] = stencilCCW;
477                         }
478
479                         if(pixelState.fogActive)
480                         {
481                                 data->fog = fog;
482                         }
483
484                         if(setupState.isDrawPoint)
485                         {
486                                 data->point = point;
487                         }
488
489                         data->lineWidth = context->lineWidth;
490
491                         data->factor = factor;
492
493                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
494                         {
495                                 float ref = context->alphaReference * (1.0f / 255.0f);
496                                 float margin = sw::min(ref, 1.0f - ref);
497
498                                 if(ms == 4)
499                                 {
500                                         data->a2c0 = replicate(ref - margin * 0.6f);
501                                         data->a2c1 = replicate(ref - margin * 0.2f);
502                                         data->a2c2 = replicate(ref + margin * 0.2f);
503                                         data->a2c3 = replicate(ref + margin * 0.6f);
504                                 }
505                                 else if(ms == 2)
506                                 {
507                                         data->a2c0 = replicate(ref - margin * 0.3f);
508                                         data->a2c1 = replicate(ref + margin * 0.3f);
509                                 }
510                                 else ASSERT(false);
511                         }
512
513                         if(pixelState.occlusionEnabled)
514                         {
515                                 for(int cluster = 0; cluster < clusterCount; cluster++)
516                                 {
517                                         data->occlusion[cluster] = 0;
518                                 }
519                         }
520
521                         #if PERF_PROFILE
522                                 for(int cluster = 0; cluster < clusterCount; cluster++)
523                                 {
524                                         for(int i = 0; i < PERF_TIMERS; i++)
525                                         {
526                                                 data->cycles[i][cluster] = 0;
527                                         }
528                                 }
529                         #endif
530
531                         // Viewport
532                         {
533                                 float W = 0.5f * viewport.width;
534                                 float H = 0.5f * viewport.height;
535                                 float X0 = viewport.x0 + W;
536                                 float Y0 = viewport.y0 + H;
537                                 float N = viewport.minZ;
538                                 float F = viewport.maxZ;
539                                 float Z = F - N;
540
541                                 if(context->isDrawTriangle(false))
542                                 {
543                                         N += depthBias;
544                                 }
545
546                                 if(complementaryDepthBuffer)
547                                 {
548                                         Z = -Z;
549                                         N = 1 - N;
550                                 }
551
552                                 static const float X[5][16] =   // Fragment offsets
553                                 {
554                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
555                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
556                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
557                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
558                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
559                                 };
560
561                                 static const float Y[5][16] =   // Fragment offsets
562                                 {
563                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
564                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
565                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
566                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
567                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
568                                 };
569
570                                 int s = sw::log2(ss);
571
572                                 data->Wx16 = replicate(W * 16);
573                                 data->Hx16 = replicate(H * 16);
574                                 data->X0x16 = replicate(X0 * 16 - 8);
575                                 data->Y0x16 = replicate(Y0 * 16 - 8);
576                                 data->XXXX = replicate(X[s][q] / W);
577                                 data->YYYY = replicate(Y[s][q] / H);
578                                 data->halfPixelX = replicate(0.5f / W);
579                                 data->halfPixelY = replicate(0.5f / H);
580                                 data->viewportHeight = abs(viewport.height);
581                                 data->slopeDepthBias = slopeDepthBias;
582                                 data->depthRange = Z;
583                                 data->depthNear = N;
584                                 draw->clipFlags = clipFlags;
585
586                                 if(clipFlags)
587                                 {
588                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
589                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
590                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
591                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
592                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
593                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
594                                 }
595                         }
596
597                         // Target
598                         {
599                                 for(int index = 0; index < RENDERTARGETS; index++)
600                                 {
601                                         draw->renderTarget[index] = context->renderTarget[index];
602
603                                         if(draw->renderTarget[index])
604                                         {
605                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
606                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
607                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
608                                         }
609                                 }
610
611                                 draw->depthBuffer = context->depthBuffer;
612                                 draw->stencilBuffer = context->stencilBuffer;
613
614                                 if(draw->depthBuffer)
615                                 {
616                                         data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
617                                         data->depthPitchB = context->depthBuffer->getInternalPitchB();
618                                         data->depthSliceB = context->depthBuffer->getInternalSliceB();
619                                 }
620
621                                 if(draw->stencilBuffer)
622                                 {
623                                         data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(q * ms, MANAGED);
624                                         data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
625                                         data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
626                                 }
627                         }
628
629                         // Scissor
630                         {
631                                 data->scissorX0 = scissor.x0;
632                                 data->scissorX1 = scissor.x1;
633                                 data->scissorY0 = scissor.y0;
634                                 data->scissorY1 = scissor.y1;
635                         }
636
637                         draw->primitive = 0;
638                         draw->count = count;
639
640                         draw->references = (count + batch - 1) / batch;
641
642                         schedulerMutex.lock();
643                         nextDraw++;
644                         schedulerMutex.unlock();
645
646                         if(threadCount > 1)
647                         {
648                                 if(!threadsAwake)
649                                 {
650                                         suspend[0]->wait();
651
652                                         threadsAwake = 1;
653                                         task[0].type = Task::RESUME;
654
655                                         resume[0]->signal();
656                                 }
657                         }
658                         else   // Use main thread for draw execution
659                         {
660                                 threadsAwake = 1;
661                                 task[0].type = Task::RESUME;
662
663                                 taskLoop(0);
664                         }
665                 }
666         }
667
668         void Renderer::threadFunction(void *parameters)
669         {
670                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
671                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
672
673                 if(logPrecision < IEEE)
674                 {
675                         CPUID::setFlushToZero(true);
676                         CPUID::setDenormalsAreZero(true);
677                 }
678
679                 renderer->threadLoop(threadIndex);
680         }
681
682         void Renderer::threadLoop(int threadIndex)
683         {
684                 while(!exitThreads)
685                 {
686                         taskLoop(threadIndex);
687
688                         suspend[threadIndex]->signal();
689                         resume[threadIndex]->wait();
690                 }
691         }
692
693         void Renderer::taskLoop(int threadIndex)
694         {
695                 while(task[threadIndex].type != Task::SUSPEND)
696                 {
697                         scheduleTask(threadIndex);
698                         executeTask(threadIndex);
699                 }
700         }
701
702         void Renderer::findAvailableTasks()
703         {
704                 // Find pixel tasks
705                 for(int cluster = 0; cluster < clusterCount; cluster++)
706                 {
707                         if(!pixelProgress[cluster].executing)
708                         {
709                                 for(int unit = 0; unit < unitCount; unit++)
710                                 {
711                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
712                                         {
713                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
714                                                 {
715                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
716                                                         {
717                                                                 Task &task = taskQueue[qHead];
718                                                                 task.type = Task::PIXELS;
719                                                                 task.primitiveUnit = unit;
720                                                                 task.pixelCluster = cluster;
721
722                                                                 pixelProgress[cluster].executing = true;
723
724                                                                 // Commit to the task queue
725                                                                 qHead = (qHead + 1) % 32;
726                                                                 qSize++;
727
728                                                                 break;
729                                                         }
730                                                 }
731                                         }
732                                 }
733                         }
734                 }
735
736                 // Find primitive tasks
737                 if(currentDraw == nextDraw)
738                 {
739                         return;   // No more primitives to process
740                 }
741
742                 for(int unit = 0; unit < unitCount; unit++)
743                 {
744                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
745
746                         if(draw->primitive >= draw->count)
747                         {
748                                 currentDraw++;
749
750                                 if(currentDraw == nextDraw)
751                                 {
752                                         return;   // No more primitives to process
753                                 }
754
755                                 draw = drawList[currentDraw % DRAW_COUNT];
756                         }
757
758                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
759                         {
760                                 int primitive = draw->primitive;
761                                 int count = draw->count;
762                                 int batch = draw->batchSize;
763
764                                 primitiveProgress[unit].drawCall = currentDraw;
765                                 primitiveProgress[unit].firstPrimitive = primitive;
766                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
767
768                                 draw->primitive += batch;
769
770                                 Task &task = taskQueue[qHead];
771                                 task.type = Task::PRIMITIVES;
772                                 task.primitiveUnit = unit;
773
774                                 primitiveProgress[unit].references = -1;
775
776                                 // Commit to the task queue
777                                 qHead = (qHead + 1) % 32;
778                                 qSize++;
779                         }
780                 }
781         }
782
783         void Renderer::scheduleTask(int threadIndex)
784         {
785                 schedulerMutex.lock();
786
787                 if((int)qSize < threadCount - threadsAwake + 1)
788                 {
789                         findAvailableTasks();
790                 }
791
792                 if(qSize != 0)
793                 {
794                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
795                         qSize--;
796
797                         if(threadsAwake != threadCount)
798                         {
799                                 int wakeup = qSize - threadsAwake + 1;
800
801                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
802                                 {
803                                         if(task[i].type == Task::SUSPEND)
804                                         {
805                                                 suspend[i]->wait();
806                                                 task[i].type = Task::RESUME;
807                                                 resume[i]->signal();
808
809                                                 threadsAwake++;
810                                                 wakeup--;
811                                         }
812                                 }
813                         }
814                 }
815                 else
816                 {
817                         task[threadIndex].type = Task::SUSPEND;
818
819                         threadsAwake--;
820                 }
821
822                 schedulerMutex.unlock();
823         }
824
825         void Renderer::executeTask(int threadIndex)
826         {
827                 #if PERF_HUD
828                         int64_t startTick = Timer::ticks();
829                 #endif
830
831                 switch(task[threadIndex].type)
832                 {
833                 case Task::PRIMITIVES:
834                         {
835                                 int unit = task[threadIndex].primitiveUnit;
836
837                                 int input = primitiveProgress[unit].firstPrimitive;
838                                 int count = primitiveProgress[unit].primitiveCount;
839                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
840                                 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
841
842                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
843
844                                 #if PERF_HUD
845                                         int64_t time = Timer::ticks();
846                                         vertexTime[threadIndex] += time - startTick;
847                                         startTick = time;
848                                 #endif
849
850                                 int visible = 0;
851
852                                 if(!draw->setupState.rasterizerDiscard)
853                                 {
854                                         visible = (this->*setupPrimitives)(unit, count);
855                                 }
856
857                                 primitiveProgress[unit].visible = visible;
858                                 primitiveProgress[unit].references = clusterCount;
859
860                                 #if PERF_HUD
861                                         setupTime[threadIndex] += Timer::ticks() - startTick;
862                                 #endif
863                         }
864                         break;
865                 case Task::PIXELS:
866                         {
867                                 int unit = task[threadIndex].primitiveUnit;
868                                 int visible = primitiveProgress[unit].visible;
869
870                                 if(visible > 0)
871                                 {
872                                         int cluster = task[threadIndex].pixelCluster;
873                                         Primitive *primitive = primitiveBatch[unit];
874                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
875                                         DrawData *data = draw->data;
876                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
877
878                                         pixelRoutine(primitive, visible, cluster, data);
879                                 }
880
881                                 finishRendering(task[threadIndex]);
882
883                                 #if PERF_HUD
884                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
885                                 #endif
886                         }
887                         break;
888                 case Task::RESUME:
889                         break;
890                 case Task::SUSPEND:
891                         break;
892                 default:
893                         ASSERT(false);
894                 }
895         }
896
897         void Renderer::synchronize()
898         {
899                 sync->lock(sw::PUBLIC);
900                 sync->unlock();
901         }
902
903         void Renderer::finishRendering(Task &pixelTask)
904         {
905                 int unit = pixelTask.primitiveUnit;
906                 int cluster = pixelTask.pixelCluster;
907
908                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
909                 DrawData &data = *draw.data;
910                 int primitive = primitiveProgress[unit].firstPrimitive;
911                 int count = primitiveProgress[unit].primitiveCount;
912                 int processedPrimitives = primitive + count;
913
914                 pixelProgress[cluster].processedPrimitives = processedPrimitives;
915
916                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
917                 {
918                         pixelProgress[cluster].drawCall++;
919                         pixelProgress[cluster].processedPrimitives = 0;
920                 }
921
922                 int ref = atomicDecrement(&primitiveProgress[unit].references);
923
924                 if(ref == 0)
925                 {
926                         ref = atomicDecrement(&draw.references);
927
928                         if(ref == 0)
929                         {
930                                 #if PERF_PROFILE
931                                         for(int cluster = 0; cluster < clusterCount; cluster++)
932                                         {
933                                                 for(int i = 0; i < PERF_TIMERS; i++)
934                                                 {
935                                                         profiler.cycles[i] += data.cycles[i][cluster];
936                                                 }
937                                         }
938                                 #endif
939
940                                 if(draw.queries)
941                                 {
942                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
943                                         {
944                                                 Query *query = *q;
945
946                                                 switch(query->type)
947                                                 {
948                                                 case Query::FRAGMENTS_PASSED:
949                                                         for(int cluster = 0; cluster < clusterCount; cluster++)
950                                                         {
951                                                                 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
952                                                         }
953                                                         break;
954                                                 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
955                                                         atomicAdd((volatile int*)&query->data, processedPrimitives);
956                                                         break;
957                                                 default:
958                                                         break;
959                                                 }
960
961                                                 atomicDecrement(&query->reference);
962                                         }
963
964                                         delete draw.queries;
965                                         draw.queries = 0;
966                                 }
967
968                                 for(int i = 0; i < RENDERTARGETS; i++)
969                                 {
970                                         if(draw.renderTarget[i])
971                                         {
972                                                 draw.renderTarget[i]->unlockInternal();
973                                         }
974                                 }
975
976                                 if(draw.depthBuffer)
977                                 {
978                                         draw.depthBuffer->unlockInternal();
979                                 }
980
981                                 if(draw.stencilBuffer)
982                                 {
983                                         draw.stencilBuffer->unlockStencil();
984                                 }
985
986                                 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
987                                 {
988                                         if(draw.texture[i])
989                                         {
990                                                 draw.texture[i]->unlock();
991                                         }
992                                 }
993
994                                 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
995                                 {
996                                         if(draw.vertexStream[i])
997                                         {
998                                                 draw.vertexStream[i]->unlock();
999                                         }
1000                                 }
1001
1002                                 if(draw.indexBuffer)
1003                                 {
1004                                         draw.indexBuffer->unlock();
1005                                 }
1006
1007                                 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1008                                 {
1009                                         if(draw.pUniformBuffers[i])
1010                                         {
1011                                                 draw.pUniformBuffers[i]->unlock();
1012                                         }
1013                                         if(draw.vUniformBuffers[i])
1014                                         {
1015                                                 draw.vUniformBuffers[i]->unlock();
1016                                         }
1017                                 }
1018
1019                                 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1020                                 {
1021                                         if(draw.transformFeedbackBuffers[i])
1022                                         {
1023                                                 draw.transformFeedbackBuffers[i]->unlock();
1024                                         }
1025                                 }
1026
1027                                 draw.vertexRoutine->unbind();
1028                                 draw.setupRoutine->unbind();
1029                                 draw.pixelRoutine->unbind();
1030
1031                                 sync->unlock();
1032
1033                                 draw.references = -1;
1034                                 resumeApp->signal();
1035                         }
1036                 }
1037
1038                 pixelProgress[cluster].executing = false;
1039         }
1040
1041         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1042         {
1043                 Triangle *triangle = triangleBatch[unit];
1044                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1045                 DrawData *data = draw->data;
1046                 VertexTask *task = vertexTask[thread];
1047
1048                 const void *indices = data->indices;
1049                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1050
1051                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1052                 {
1053                         task->vertexCache.clear();
1054                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1055                 }
1056
1057                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1058
1059                 switch(draw->drawType)
1060                 {
1061                 case DRAW_POINTLIST:
1062                         {
1063                                 unsigned int index = start;
1064
1065                                 for(unsigned int i = 0; i < triangleCount; i++)
1066                                 {
1067                                         batch[i][0] = index;
1068                                         batch[i][1] = index;
1069                                         batch[i][2] = index;
1070
1071                                         index += 1;
1072                                 }
1073                         }
1074                         break;
1075                 case DRAW_LINELIST:
1076                         {
1077                                 unsigned int index = 2 * start;
1078
1079                                 for(unsigned int i = 0; i < triangleCount; i++)
1080                                 {
1081                                         batch[i][0] = index + 0;
1082                                         batch[i][1] = index + 1;
1083                                         batch[i][2] = index + 1;
1084
1085                                         index += 2;
1086                                 }
1087                         }
1088                         break;
1089                 case DRAW_LINESTRIP:
1090                         {
1091                                 unsigned int index = start;
1092
1093                                 for(unsigned int i = 0; i < triangleCount; i++)
1094                                 {
1095                                         batch[i][0] = index + 0;
1096                                         batch[i][1] = index + 1;
1097                                         batch[i][2] = index + 1;
1098
1099                                         index += 1;
1100                                 }
1101                         }
1102                         break;
1103                 case DRAW_LINELOOP:
1104                         {
1105                                 unsigned int index = start;
1106
1107                                 for(unsigned int i = 0; i < triangleCount; i++)
1108                                 {
1109                                         batch[i][0] = (index + 0) % loop;
1110                                         batch[i][1] = (index + 1) % loop;
1111                                         batch[i][2] = (index + 1) % loop;
1112
1113                                         index += 1;
1114                                 }
1115                         }
1116                         break;
1117                 case DRAW_TRIANGLELIST:
1118                         {
1119                                 unsigned int index = 3 * start;
1120
1121                                 for(unsigned int i = 0; i < triangleCount; i++)
1122                                 {
1123                                         batch[i][0] = index + 0;
1124                                         batch[i][1] = index + 1;
1125                                         batch[i][2] = index + 2;
1126
1127                                         index += 3;
1128                                 }
1129                         }
1130                         break;
1131                 case DRAW_TRIANGLESTRIP:
1132                         {
1133                                 unsigned int index = start;
1134
1135                                 for(unsigned int i = 0; i < triangleCount; i++)
1136                                 {
1137                                         batch[i][0] = index + 0;
1138                                         batch[i][1] = index + (index & 1) + 1;
1139                                         batch[i][2] = index + (~index & 1) + 1;
1140
1141                                         index += 1;
1142                                 }
1143                         }
1144                         break;
1145                 case DRAW_TRIANGLEFAN:
1146                         {
1147                                 unsigned int index = start;
1148
1149                                 for(unsigned int i = 0; i < triangleCount; i++)
1150                                 {
1151                                         batch[i][0] = index + 1;
1152                                         batch[i][1] = index + 2;
1153                                         batch[i][2] = 0;
1154
1155                                         index += 1;
1156                                 }
1157                         }
1158                         break;
1159                 case DRAW_INDEXEDPOINTLIST8:
1160                         {
1161                                 const unsigned char *index = (const unsigned char*)indices + start;
1162
1163                                 for(unsigned int i = 0; i < triangleCount; i++)
1164                                 {
1165                                         batch[i][0] = *index;
1166                                         batch[i][1] = *index;
1167                                         batch[i][2] = *index;
1168
1169                                         index += 1;
1170                                 }
1171                         }
1172                         break;
1173                 case DRAW_INDEXEDPOINTLIST16:
1174                         {
1175                                 const unsigned short *index = (const unsigned short*)indices + start;
1176
1177                                 for(unsigned int i = 0; i < triangleCount; i++)
1178                                 {
1179                                         batch[i][0] = *index;
1180                                         batch[i][1] = *index;
1181                                         batch[i][2] = *index;
1182
1183                                         index += 1;
1184                                 }
1185                         }
1186                         break;
1187                 case DRAW_INDEXEDPOINTLIST32:
1188                         {
1189                                 const unsigned int *index = (const unsigned int*)indices + start;
1190
1191                                 for(unsigned int i = 0; i < triangleCount; i++)
1192                                 {
1193                                         batch[i][0] = *index;
1194                                         batch[i][1] = *index;
1195                                         batch[i][2] = *index;
1196
1197                                         index += 1;
1198                                 }
1199                         }
1200                         break;
1201                 case DRAW_INDEXEDLINELIST8:
1202                         {
1203                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1204
1205                                 for(unsigned int i = 0; i < triangleCount; i++)
1206                                 {
1207                                         batch[i][0] = index[0];
1208                                         batch[i][1] = index[1];
1209                                         batch[i][2] = index[1];
1210
1211                                         index += 2;
1212                                 }
1213                         }
1214                         break;
1215                 case DRAW_INDEXEDLINELIST16:
1216                         {
1217                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1218
1219                                 for(unsigned int i = 0; i < triangleCount; i++)
1220                                 {
1221                                         batch[i][0] = index[0];
1222                                         batch[i][1] = index[1];
1223                                         batch[i][2] = index[1];
1224
1225                                         index += 2;
1226                                 }
1227                         }
1228                         break;
1229                 case DRAW_INDEXEDLINELIST32:
1230                         {
1231                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1232
1233                                 for(unsigned int i = 0; i < triangleCount; i++)
1234                                 {
1235                                         batch[i][0] = index[0];
1236                                         batch[i][1] = index[1];
1237                                         batch[i][2] = index[1];
1238
1239                                         index += 2;
1240                                 }
1241                         }
1242                         break;
1243                 case DRAW_INDEXEDLINESTRIP8:
1244                         {
1245                                 const unsigned char *index = (const unsigned char*)indices + start;
1246
1247                                 for(unsigned int i = 0; i < triangleCount; i++)
1248                                 {
1249                                         batch[i][0] = index[0];
1250                                         batch[i][1] = index[1];
1251                                         batch[i][2] = index[1];
1252
1253                                         index += 1;
1254                                 }
1255                         }
1256                         break;
1257                 case DRAW_INDEXEDLINESTRIP16:
1258                         {
1259                                 const unsigned short *index = (const unsigned short*)indices + start;
1260
1261                                 for(unsigned int i = 0; i < triangleCount; i++)
1262                                 {
1263                                         batch[i][0] = index[0];
1264                                         batch[i][1] = index[1];
1265                                         batch[i][2] = index[1];
1266
1267                                         index += 1;
1268                                 }
1269                         }
1270                         break;
1271                 case DRAW_INDEXEDLINESTRIP32:
1272                         {
1273                                 const unsigned int *index = (const unsigned int*)indices + start;
1274
1275                                 for(unsigned int i = 0; i < triangleCount; i++)
1276                                 {
1277                                         batch[i][0] = index[0];
1278                                         batch[i][1] = index[1];
1279                                         batch[i][2] = index[1];
1280
1281                                         index += 1;
1282                                 }
1283                         }
1284                         break;
1285                 case DRAW_INDEXEDLINELOOP8:
1286                         {
1287                                 const unsigned char *index = (const unsigned char*)indices;
1288
1289                                 for(unsigned int i = 0; i < triangleCount; i++)
1290                                 {
1291                                         batch[i][0] = index[(start + i + 0) % loop];
1292                                         batch[i][1] = index[(start + i + 1) % loop];
1293                                         batch[i][2] = index[(start + i + 1) % loop];
1294                                 }
1295                         }
1296                         break;
1297                 case DRAW_INDEXEDLINELOOP16:
1298                         {
1299                                 const unsigned short *index = (const unsigned short*)indices;
1300
1301                                 for(unsigned int i = 0; i < triangleCount; i++)
1302                                 {
1303                                         batch[i][0] = index[(start + i + 0) % loop];
1304                                         batch[i][1] = index[(start + i + 1) % loop];
1305                                         batch[i][2] = index[(start + i + 1) % loop];
1306                                 }
1307                         }
1308                         break;
1309                 case DRAW_INDEXEDLINELOOP32:
1310                         {
1311                                 const unsigned int *index = (const unsigned int*)indices;
1312
1313                                 for(unsigned int i = 0; i < triangleCount; i++)
1314                                 {
1315                                         batch[i][0] = index[(start + i + 0) % loop];
1316                                         batch[i][1] = index[(start + i + 1) % loop];
1317                                         batch[i][2] = index[(start + i + 1) % loop];
1318                                 }
1319                         }
1320                         break;
1321                 case DRAW_INDEXEDTRIANGLELIST8:
1322                         {
1323                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1324
1325                                 for(unsigned int i = 0; i < triangleCount; i++)
1326                                 {
1327                                         batch[i][0] = index[0];
1328                                         batch[i][1] = index[1];
1329                                         batch[i][2] = index[2];
1330
1331                                         index += 3;
1332                                 }
1333                         }
1334                         break;
1335                 case DRAW_INDEXEDTRIANGLELIST16:
1336                         {
1337                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1338
1339                                 for(unsigned int i = 0; i < triangleCount; i++)
1340                                 {
1341                                         batch[i][0] = index[0];
1342                                         batch[i][1] = index[1];
1343                                         batch[i][2] = index[2];
1344
1345                                         index += 3;
1346                                 }
1347                         }
1348                         break;
1349                 case DRAW_INDEXEDTRIANGLELIST32:
1350                         {
1351                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1352
1353                                 for(unsigned int i = 0; i < triangleCount; i++)
1354                                 {
1355                                         batch[i][0] = index[0];
1356                                         batch[i][1] = index[1];
1357                                         batch[i][2] = index[2];
1358
1359                                         index += 3;
1360                                 }
1361                         }
1362                         break;
1363                 case DRAW_INDEXEDTRIANGLESTRIP8:
1364                         {
1365                                 const unsigned char *index = (const unsigned char*)indices + start;
1366
1367                                 for(unsigned int i = 0; i < triangleCount; i++)
1368                                 {
1369                                         batch[i][0] = index[0];
1370                                         batch[i][1] = index[((start + i) & 1) + 1];
1371                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1372
1373                                         index += 1;
1374                                 }
1375                         }
1376                         break;
1377                 case DRAW_INDEXEDTRIANGLESTRIP16:
1378                         {
1379                                 const unsigned short *index = (const unsigned short*)indices + start;
1380
1381                                 for(unsigned int i = 0; i < triangleCount; i++)
1382                                 {
1383                                         batch[i][0] = index[0];
1384                                         batch[i][1] = index[((start + i) & 1) + 1];
1385                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1386
1387                                         index += 1;
1388                                 }
1389                         }
1390                         break;
1391                 case DRAW_INDEXEDTRIANGLESTRIP32:
1392                         {
1393                                 const unsigned int *index = (const unsigned int*)indices + start;
1394
1395                                 for(unsigned int i = 0; i < triangleCount; i++)
1396                                 {
1397                                         batch[i][0] = index[0];
1398                                         batch[i][1] = index[((start + i) & 1) + 1];
1399                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1400
1401                                         index += 1;
1402                                 }
1403                         }
1404                         break;
1405                 case DRAW_INDEXEDTRIANGLEFAN8:
1406                         {
1407                                 const unsigned char *index = (const unsigned char*)indices;
1408
1409                                 for(unsigned int i = 0; i < triangleCount; i++)
1410                                 {
1411                                         batch[i][0] = index[start + i + 1];
1412                                         batch[i][1] = index[start + i + 2];
1413                                         batch[i][2] = index[0];
1414                                 }
1415                         }
1416                         break;
1417                 case DRAW_INDEXEDTRIANGLEFAN16:
1418                         {
1419                                 const unsigned short *index = (const unsigned short*)indices;
1420
1421                                 for(unsigned int i = 0; i < triangleCount; i++)
1422                                 {
1423                                         batch[i][0] = index[start + i + 1];
1424                                         batch[i][1] = index[start + i + 2];
1425                                         batch[i][2] = index[0];
1426                                 }
1427                         }
1428                         break;
1429                 case DRAW_INDEXEDTRIANGLEFAN32:
1430                         {
1431                                 const unsigned int *index = (const unsigned int*)indices;
1432
1433                                 for(unsigned int i = 0; i < triangleCount; i++)
1434                                 {
1435                                         batch[i][0] = index[start + i + 1];
1436                                         batch[i][1] = index[start + i + 2];
1437                                         batch[i][2] = index[0];
1438                                 }
1439                         }
1440                         break;
1441                 case DRAW_QUADLIST:
1442                         {
1443                                 unsigned int index = 4 * start / 2;
1444
1445                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1446                                 {
1447                                         batch[i+0][0] = index + 0;
1448                                         batch[i+0][1] = index + 1;
1449                                         batch[i+0][2] = index + 2;
1450
1451                                         batch[i+1][0] = index + 0;
1452                                         batch[i+1][1] = index + 2;
1453                                         batch[i+1][2] = index + 3;
1454
1455                                         index += 4;
1456                                 }
1457                         }
1458                         break;
1459                 default:
1460                         ASSERT(false);
1461                         return;
1462                 }
1463
1464                 task->primitiveStart = start;
1465                 task->vertexCount = triangleCount * 3;
1466                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1467         }
1468
1469         int Renderer::setupSolidTriangles(int unit, int count)
1470         {
1471                 Triangle *triangle = triangleBatch[unit];
1472                 Primitive *primitive = primitiveBatch[unit];
1473
1474                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1475                 SetupProcessor::State &state = draw.setupState;
1476                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1477
1478                 int ms = state.multiSample;
1479                 int pos = state.positionRegister;
1480                 const DrawData *data = draw.data;
1481                 int visible = 0;
1482
1483                 for(int i = 0; i < count; i++, triangle++)
1484                 {
1485                         Vertex &v0 = triangle->v0;
1486                         Vertex &v1 = triangle->v1;
1487                         Vertex &v2 = triangle->v2;
1488
1489                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1490                         {
1491                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1492
1493                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1494
1495                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1496                                 {
1497                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1498                                         {
1499                                                 continue;
1500                                         }
1501                                 }
1502
1503                                 if(setupRoutine(primitive, triangle, &polygon, data))
1504                                 {
1505                                         primitive += ms;
1506                                         visible++;
1507                                 }
1508                         }
1509                 }
1510
1511                 return visible;
1512         }
1513
1514         int Renderer::setupWireframeTriangle(int unit, int count)
1515         {
1516                 Triangle *triangle = triangleBatch[unit];
1517                 Primitive *primitive = primitiveBatch[unit];
1518                 int visible = 0;
1519
1520                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1521                 SetupProcessor::State &state = draw.setupState;
1522
1523                 const Vertex &v0 = triangle[0].v0;
1524                 const Vertex &v1 = triangle[0].v1;
1525                 const Vertex &v2 = triangle[0].v2;
1526
1527                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1528
1529                 if(state.cullMode == CULL_CLOCKWISE)
1530                 {
1531                         if(d >= 0) return 0;
1532                 }
1533                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1534                 {
1535                         if(d <= 0) return 0;
1536                 }
1537
1538                 // Copy attributes
1539                 triangle[1].v0 = v1;
1540                 triangle[1].v1 = v2;
1541                 triangle[2].v0 = v2;
1542                 triangle[2].v1 = v0;
1543
1544                 if(state.color[0][0].flat)   // FIXME
1545                 {
1546                         for(int i = 0; i < 2; i++)
1547                         {
1548                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1549                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1550                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1551                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1552                         }
1553                 }
1554
1555                 for(int i = 0; i < 3; i++)
1556                 {
1557                         if(setupLine(*primitive, *triangle, draw))
1558                         {
1559                                 primitive->area = 0.5f * d;
1560
1561                                 primitive++;
1562                                 visible++;
1563                         }
1564
1565                         triangle++;
1566                 }
1567
1568                 return visible;
1569         }
1570
1571         int Renderer::setupVertexTriangle(int unit, int count)
1572         {
1573                 Triangle *triangle = triangleBatch[unit];
1574                 Primitive *primitive = primitiveBatch[unit];
1575                 int visible = 0;
1576
1577                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1578                 SetupProcessor::State &state = draw.setupState;
1579
1580                 const Vertex &v0 = triangle[0].v0;
1581                 const Vertex &v1 = triangle[0].v1;
1582                 const Vertex &v2 = triangle[0].v2;
1583
1584                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1585
1586                 if(state.cullMode == CULL_CLOCKWISE)
1587                 {
1588                         if(d >= 0) return 0;
1589                 }
1590                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1591                 {
1592                         if(d <= 0) return 0;
1593                 }
1594
1595                 // Copy attributes
1596                 triangle[1].v0 = v1;
1597                 triangle[2].v0 = v2;
1598
1599                 for(int i = 0; i < 3; i++)
1600                 {
1601                         if(setupPoint(*primitive, *triangle, draw))
1602                         {
1603                                 primitive->area = 0.5f * d;
1604
1605                                 primitive++;
1606                                 visible++;
1607                         }
1608
1609                         triangle++;
1610                 }
1611
1612                 return visible;
1613         }
1614
1615         int Renderer::setupLines(int unit, int count)
1616         {
1617                 Triangle *triangle = triangleBatch[unit];
1618                 Primitive *primitive = primitiveBatch[unit];
1619                 int visible = 0;
1620
1621                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1622                 SetupProcessor::State &state = draw.setupState;
1623
1624                 int ms = state.multiSample;
1625
1626                 for(int i = 0; i < count; i++)
1627                 {
1628                         if(setupLine(*primitive, *triangle, draw))
1629                         {
1630                                 primitive += ms;
1631                                 visible++;
1632                         }
1633
1634                         triangle++;
1635                 }
1636
1637                 return visible;
1638         }
1639
1640         int Renderer::setupPoints(int unit, int count)
1641         {
1642                 Triangle *triangle = triangleBatch[unit];
1643                 Primitive *primitive = primitiveBatch[unit];
1644                 int visible = 0;
1645
1646                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1647                 SetupProcessor::State &state = draw.setupState;
1648
1649                 int ms = state.multiSample;
1650
1651                 for(int i = 0; i < count; i++)
1652                 {
1653                         if(setupPoint(*primitive, *triangle, draw))
1654                         {
1655                                 primitive += ms;
1656                                 visible++;
1657                         }
1658
1659                         triangle++;
1660                 }
1661
1662                 return visible;
1663         }
1664
1665         bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1666         {
1667                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1668                 const SetupProcessor::State &state = draw.setupState;
1669                 const DrawData &data = *draw.data;
1670
1671                 float lineWidth = data.lineWidth;
1672
1673                 Vertex &v0 = triangle.v0;
1674                 Vertex &v1 = triangle.v1;
1675
1676                 int pos = state.positionRegister;
1677
1678                 const float4 &P0 = v0.v[pos];
1679                 const float4 &P1 = v1.v[pos];
1680
1681                 if(P0.w <= 0 && P1.w <= 0)
1682                 {
1683                         return false;
1684                 }
1685
1686                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1687                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1688
1689                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1690                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1691
1692                 if(dx == 0 && dy == 0)
1693                 {
1694                         return false;
1695                 }
1696
1697                 if(false)   // Rectangle
1698                 {
1699                         float4 P[4];
1700                         int C[4];
1701
1702                         P[0] = P0;
1703                         P[1] = P1;
1704                         P[2] = P1;
1705                         P[3] = P0;
1706
1707                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1708
1709                         dx *= scale;
1710                         dy *= scale;
1711
1712                         float dx0w = dx * P0.w / W;
1713                         float dy0h = dy * P0.w / H;
1714                         float dx0h = dx * P0.w / H;
1715                         float dy0w = dy * P0.w / W;
1716
1717                         float dx1w = dx * P1.w / W;
1718                         float dy1h = dy * P1.w / H;
1719                         float dx1h = dx * P1.w / H;
1720                         float dy1w = dy * P1.w / W;
1721
1722                         P[0].x += -dy0w + -dx0w;
1723                         P[0].y += -dx0h + +dy0h;
1724                         C[0] = clipper->computeClipFlags(P[0]);
1725
1726                         P[1].x += -dy1w + +dx1w;
1727                         P[1].y += -dx1h + +dy1h;
1728                         C[1] = clipper->computeClipFlags(P[1]);
1729
1730                         P[2].x += +dy1w + +dx1w;
1731                         P[2].y += +dx1h + -dy1h;
1732                         C[2] = clipper->computeClipFlags(P[2]);
1733
1734                         P[3].x += +dy0w + -dx0w;
1735                         P[3].y += +dx0h + +dy0h;
1736                         C[3] = clipper->computeClipFlags(P[3]);
1737
1738                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1739                         {
1740                                 Polygon polygon(P, 4);
1741
1742                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1743
1744                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1745                                 {
1746                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1747                                         {
1748                                                 return false;
1749                                         }
1750                                 }
1751
1752                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1753                         }
1754                 }
1755                 else   // Diamond test convention
1756                 {
1757                         float4 P[8];
1758                         int C[8];
1759
1760                         P[0] = P0;
1761                         P[1] = P0;
1762                         P[2] = P0;
1763                         P[3] = P0;
1764                         P[4] = P1;
1765                         P[5] = P1;
1766                         P[6] = P1;
1767                         P[7] = P1;
1768
1769                         float dx0 = lineWidth * 0.5f * P0.w / W;
1770                         float dy0 = lineWidth * 0.5f * P0.w / H;
1771
1772                         float dx1 = lineWidth * 0.5f * P1.w / W;
1773                         float dy1 = lineWidth * 0.5f * P1.w / H;
1774
1775                         P[0].x += -dx0;
1776                         C[0] = clipper->computeClipFlags(P[0]);
1777
1778                         P[1].y += +dy0;
1779                         C[1] = clipper->computeClipFlags(P[1]);
1780
1781                         P[2].x += +dx0;
1782                         C[2] = clipper->computeClipFlags(P[2]);
1783
1784                         P[3].y += -dy0;
1785                         C[3] = clipper->computeClipFlags(P[3]);
1786
1787                         P[4].x += -dx1;
1788                         C[4] = clipper->computeClipFlags(P[4]);
1789
1790                         P[5].y += +dy1;
1791                         C[5] = clipper->computeClipFlags(P[5]);
1792
1793                         P[6].x += +dx1;
1794                         C[6] = clipper->computeClipFlags(P[6]);
1795
1796                         P[7].y += -dy1;
1797                         C[7] = clipper->computeClipFlags(P[7]);
1798
1799                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1800                         {
1801                                 float4 L[6];
1802
1803                                 if(dx > -dy)
1804                                 {
1805                                         if(dx > dy)   // Right
1806                                         {
1807                                                 L[0] = P[0];
1808                                                 L[1] = P[1];
1809                                                 L[2] = P[5];
1810                                                 L[3] = P[6];
1811                                                 L[4] = P[7];
1812                                                 L[5] = P[3];
1813                                         }
1814                                         else   // Down
1815                                         {
1816                                                 L[0] = P[0];
1817                                                 L[1] = P[4];
1818                                                 L[2] = P[5];
1819                                                 L[3] = P[6];
1820                                                 L[4] = P[2];
1821                                                 L[5] = P[3];
1822                                         }
1823                                 }
1824                                 else
1825                                 {
1826                                         if(dx > dy)   // Up
1827                                         {
1828                                                 L[0] = P[0];
1829                                                 L[1] = P[1];
1830                                                 L[2] = P[2];
1831                                                 L[3] = P[6];
1832                                                 L[4] = P[7];
1833                                                 L[5] = P[4];
1834                                         }
1835                                         else   // Left
1836                                         {
1837                                                 L[0] = P[1];
1838                                                 L[1] = P[2];
1839                                                 L[2] = P[3];
1840                                                 L[3] = P[7];
1841                                                 L[4] = P[4];
1842                                                 L[5] = P[5];
1843                                         }
1844                                 }
1845
1846                                 Polygon polygon(L, 6);
1847
1848                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1849
1850                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1851                                 {
1852                                         if(!clipper->clip(polygon, clipFlagsOr, draw))
1853                                         {
1854                                                 return false;
1855                                         }
1856                                 }
1857
1858                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1859                         }
1860                 }
1861
1862                 return false;
1863         }
1864
1865         bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1866         {
1867                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1868                 const SetupProcessor::State &state = draw.setupState;
1869                 const DrawData &data = *draw.data;
1870
1871                 Vertex &v = triangle.v0;
1872
1873                 float pSize;
1874
1875                 int pts = state.pointSizeRegister;
1876
1877                 if(state.pointSizeRegister != Unused)
1878                 {
1879                         pSize = v.v[pts].y;
1880                 }
1881                 else
1882                 {
1883                         pSize = data.point.pointSize[0];
1884                 }
1885
1886                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1887
1888                 float4 P[4];
1889                 int C[4];
1890
1891                 int pos = state.positionRegister;
1892
1893                 P[0] = v.v[pos];
1894                 P[1] = v.v[pos];
1895                 P[2] = v.v[pos];
1896                 P[3] = v.v[pos];
1897
1898                 const float X = pSize * P[0].w * data.halfPixelX[0];
1899                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1900
1901                 P[0].x -= X;
1902                 P[0].y += Y;
1903                 C[0] = clipper->computeClipFlags(P[0]);
1904
1905                 P[1].x += X;
1906                 P[1].y += Y;
1907                 C[1] = clipper->computeClipFlags(P[1]);
1908
1909                 P[2].x += X;
1910                 P[2].y -= Y;
1911                 C[2] = clipper->computeClipFlags(P[2]);
1912
1913                 P[3].x -= X;
1914                 P[3].y -= Y;
1915                 C[3] = clipper->computeClipFlags(P[3]);
1916
1917                 triangle.v1 = triangle.v0;
1918                 triangle.v2 = triangle.v0;
1919
1920                 triangle.v1.X += iround(16 * 0.5f * pSize);
1921                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1922
1923                 Polygon polygon(P, 4);
1924
1925                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1926                 {
1927                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1928
1929                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1930                         {
1931                                 if(!clipper->clip(polygon, clipFlagsOr, draw))
1932                                 {
1933                                         return false;
1934                                 }
1935                         }
1936
1937                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1938                 }
1939
1940                 return false;
1941         }
1942
1943         void Renderer::initializeThreads()
1944         {
1945                 unitCount = ceilPow2(threadCount);
1946                 clusterCount = ceilPow2(threadCount);
1947
1948                 for(int i = 0; i < unitCount; i++)
1949                 {
1950                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1951                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1952                 }
1953
1954                 for(int i = 0; i < threadCount; i++)
1955                 {
1956                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1957                         vertexTask[i]->vertexCache.drawCall = -1;
1958
1959                         task[i].type = Task::SUSPEND;
1960
1961                         resume[i] = new Event();
1962                         suspend[i] = new Event();
1963
1964                         Parameters parameters;
1965                         parameters.threadIndex = i;
1966                         parameters.renderer = this;
1967
1968                         exitThreads = false;
1969                         worker[i] = new Thread(threadFunction, &parameters);
1970
1971                         suspend[i]->wait();
1972                         suspend[i]->signal();
1973                 }
1974         }
1975
1976         void Renderer::terminateThreads()
1977         {
1978                 while(threadsAwake != 0)
1979                 {
1980                         Thread::sleep(1);
1981                 }
1982
1983                 for(int thread = 0; thread < threadCount; thread++)
1984                 {
1985                         if(worker[thread])
1986                         {
1987                                 exitThreads = true;
1988                                 resume[thread]->signal();
1989                                 worker[thread]->join();
1990
1991                                 delete worker[thread];
1992                                 worker[thread] = 0;
1993                                 delete resume[thread];
1994                                 resume[thread] = 0;
1995                                 delete suspend[thread];
1996                                 suspend[thread] = 0;
1997                         }
1998
1999                         deallocate(vertexTask[thread]);
2000                         vertexTask[thread] = 0;
2001                 }
2002
2003                 for(int i = 0; i < 16; i++)
2004                 {
2005                         deallocate(triangleBatch[i]);
2006                         triangleBatch[i] = 0;
2007
2008                         deallocate(primitiveBatch[i]);
2009                         primitiveBatch[i] = 0;
2010                 }
2011         }
2012
2013         void Renderer::loadConstants(const VertexShader *vertexShader)
2014         {
2015                 if(!vertexShader) return;
2016
2017                 size_t count = vertexShader->getLength();
2018
2019                 for(size_t i = 0; i < count; i++)
2020                 {
2021                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2022
2023                         if(instruction->opcode == Shader::OPCODE_DEF)
2024                         {
2025                                 int index = instruction->dst.index;
2026                                 float value[4];
2027
2028                                 value[0] = instruction->src[0].value[0];
2029                                 value[1] = instruction->src[0].value[1];
2030                                 value[2] = instruction->src[0].value[2];
2031                                 value[3] = instruction->src[0].value[3];
2032
2033                                 setVertexShaderConstantF(index, value);
2034                         }
2035                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2036                         {
2037                                 int index = instruction->dst.index;
2038                                 int integer[4];
2039
2040                                 integer[0] = instruction->src[0].integer[0];
2041                                 integer[1] = instruction->src[0].integer[1];
2042                                 integer[2] = instruction->src[0].integer[2];
2043                                 integer[3] = instruction->src[0].integer[3];
2044
2045                                 setVertexShaderConstantI(index, integer);
2046                         }
2047                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2048                         {
2049                                 int index = instruction->dst.index;
2050                                 int boolean = instruction->src[0].boolean[0];
2051
2052                                 setVertexShaderConstantB(index, &boolean);
2053                         }
2054                 }
2055         }
2056
2057         void Renderer::loadConstants(const PixelShader *pixelShader)
2058         {
2059                 if(!pixelShader) return;
2060
2061                 size_t count = pixelShader->getLength();
2062
2063                 for(size_t i = 0; i < count; i++)
2064                 {
2065                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2066
2067                         if(instruction->opcode == Shader::OPCODE_DEF)
2068                         {
2069                                 int index = instruction->dst.index;
2070                                 float value[4];
2071
2072                                 value[0] = instruction->src[0].value[0];
2073                                 value[1] = instruction->src[0].value[1];
2074                                 value[2] = instruction->src[0].value[2];
2075                                 value[3] = instruction->src[0].value[3];
2076
2077                                 setPixelShaderConstantF(index, value);
2078                         }
2079                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2080                         {
2081                                 int index = instruction->dst.index;
2082                                 int integer[4];
2083
2084                                 integer[0] = instruction->src[0].integer[0];
2085                                 integer[1] = instruction->src[0].integer[1];
2086                                 integer[2] = instruction->src[0].integer[2];
2087                                 integer[3] = instruction->src[0].integer[3];
2088
2089                                 setPixelShaderConstantI(index, integer);
2090                         }
2091                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2092                         {
2093                                 int index = instruction->dst.index;
2094                                 int boolean = instruction->src[0].boolean[0];
2095
2096                                 setPixelShaderConstantB(index, &boolean);
2097                         }
2098                 }
2099         }
2100
2101         void Renderer::setIndexBuffer(Resource *indexBuffer)
2102         {
2103                 context->indexBuffer = indexBuffer;
2104         }
2105
2106         void Renderer::setMultiSampleMask(unsigned int mask)
2107         {
2108                 context->sampleMask = mask;
2109         }
2110
2111         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2112         {
2113                 sw::transparencyAntialiasing = transparencyAntialiasing;
2114         }
2115
2116         bool Renderer::isReadWriteTexture(int sampler)
2117         {
2118                 for(int index = 0; index < RENDERTARGETS; index++)
2119                 {
2120                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2121                         {
2122                                 return true;
2123                         }
2124                 }
2125
2126                 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2127                 {
2128                         return true;
2129                 }
2130
2131                 return false;
2132         }
2133
2134         void Renderer::updateClipper()
2135         {
2136                 if(updateClipPlanes)
2137                 {
2138                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2139                         {
2140                                 const Matrix &scissorWorld = getViewTransform();
2141
2142                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2143                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2144                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2145                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2146                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2147                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2148                         }
2149                         else   // User plane in clip space
2150                         {
2151                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2152                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2153                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2154                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2155                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2156                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2157                         }
2158
2159                         updateClipPlanes = false;
2160                 }
2161         }
2162
2163         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2164         {
2165                 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2166
2167                 context->texture[sampler] = resource;
2168         }
2169
2170         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2171         {
2172                 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2173
2174                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2175         }
2176
2177         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2178         {
2179                 if(type == SAMPLER_PIXEL)
2180                 {
2181                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2182                 }
2183                 else
2184                 {
2185                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2186                 }
2187         }
2188
2189         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2190         {
2191                 if(type == SAMPLER_PIXEL)
2192                 {
2193                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2194                 }
2195                 else
2196                 {
2197                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2198                 }
2199         }
2200
2201         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2202         {
2203                 if(type == SAMPLER_PIXEL)
2204                 {
2205                         PixelProcessor::setGatherEnable(sampler, enable);
2206                 }
2207                 else
2208                 {
2209                         VertexProcessor::setGatherEnable(sampler, enable);
2210                 }
2211         }
2212
2213         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2214         {
2215                 if(type == SAMPLER_PIXEL)
2216                 {
2217                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2218                 }
2219                 else
2220                 {
2221                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2222                 }
2223         }
2224
2225         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2226         {
2227                 if(type == SAMPLER_PIXEL)
2228                 {
2229                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2230                 }
2231                 else
2232                 {
2233                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2234                 }
2235         }
2236
2237         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2238         {
2239                 if(type == SAMPLER_PIXEL)
2240                 {
2241                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2242                 }
2243                 else
2244                 {
2245                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2246                 }
2247         }
2248
2249         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2250         {
2251                 if(type == SAMPLER_PIXEL)
2252                 {
2253                         PixelProcessor::setReadSRGB(sampler, sRGB);
2254                 }
2255                 else
2256                 {
2257                         VertexProcessor::setReadSRGB(sampler, sRGB);
2258                 }
2259         }
2260
2261         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2262         {
2263                 if(type == SAMPLER_PIXEL)
2264                 {
2265                         PixelProcessor::setMipmapLOD(sampler, bias);
2266                 }
2267                 else
2268                 {
2269                         VertexProcessor::setMipmapLOD(sampler, bias);
2270                 }
2271         }
2272
2273         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2274         {
2275                 if(type == SAMPLER_PIXEL)
2276                 {
2277                         PixelProcessor::setBorderColor(sampler, borderColor);
2278                 }
2279                 else
2280                 {
2281                         VertexProcessor::setBorderColor(sampler, borderColor);
2282                 }
2283         }
2284
2285         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2286         {
2287                 if(type == SAMPLER_PIXEL)
2288                 {
2289                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2290                 }
2291                 else
2292                 {
2293                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2294                 }
2295         }
2296
2297         void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2298         {
2299                 if(type == SAMPLER_PIXEL)
2300                 {
2301                         PixelProcessor::setSwizzleR(sampler, swizzleR);
2302                 }
2303                 else
2304                 {
2305                         VertexProcessor::setSwizzleR(sampler, swizzleR);
2306                 }
2307         }
2308
2309         void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2310         {
2311                 if(type == SAMPLER_PIXEL)
2312                 {
2313                         PixelProcessor::setSwizzleG(sampler, swizzleG);
2314                 }
2315                 else
2316                 {
2317                         VertexProcessor::setSwizzleG(sampler, swizzleG);
2318                 }
2319         }
2320
2321         void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2322         {
2323                 if(type == SAMPLER_PIXEL)
2324                 {
2325                         PixelProcessor::setSwizzleB(sampler, swizzleB);
2326                 }
2327                 else
2328                 {
2329                         VertexProcessor::setSwizzleB(sampler, swizzleB);
2330                 }
2331         }
2332
2333         void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2334         {
2335                 if(type == SAMPLER_PIXEL)
2336                 {
2337                         PixelProcessor::setSwizzleA(sampler, swizzleA);
2338                 }
2339                 else
2340                 {
2341                         VertexProcessor::setSwizzleA(sampler, swizzleA);
2342                 }
2343         }
2344
2345         void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2346         {
2347                 if(type == SAMPLER_PIXEL)
2348                 {
2349                         PixelProcessor::setBaseLevel(sampler, baseLevel);
2350                 }
2351                 else
2352                 {
2353                         VertexProcessor::setBaseLevel(sampler, baseLevel);
2354                 }
2355         }
2356
2357         void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2358         {
2359                 if(type == SAMPLER_PIXEL)
2360                 {
2361                         PixelProcessor::setMaxLevel(sampler, maxLevel);
2362                 }
2363                 else
2364                 {
2365                         VertexProcessor::setMaxLevel(sampler, maxLevel);
2366                 }
2367         }
2368
2369         void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2370         {
2371                 if(type == SAMPLER_PIXEL)
2372                 {
2373                         PixelProcessor::setMinLod(sampler, minLod);
2374                 }
2375                 else
2376                 {
2377                         VertexProcessor::setMinLod(sampler, minLod);
2378                 }
2379         }
2380
2381         void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2382         {
2383                 if(type == SAMPLER_PIXEL)
2384                 {
2385                         PixelProcessor::setMaxLod(sampler, maxLod);
2386                 }
2387                 else
2388                 {
2389                         VertexProcessor::setMaxLod(sampler, maxLod);
2390                 }
2391         }
2392
2393         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2394         {
2395                 context->setPointSpriteEnable(pointSpriteEnable);
2396         }
2397
2398         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2399         {
2400                 context->setPointScaleEnable(pointScaleEnable);
2401         }
2402
2403         void Renderer::setLineWidth(float width)
2404         {
2405                 context->lineWidth = width;
2406         }
2407
2408         void Renderer::setDepthBias(float bias)
2409         {
2410                 depthBias = bias;
2411         }
2412
2413         void Renderer::setSlopeDepthBias(float slopeBias)
2414         {
2415                 slopeDepthBias = slopeBias;
2416         }
2417
2418         void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2419         {
2420                 context->rasterizerDiscard = rasterizerDiscard;
2421         }
2422
2423         void Renderer::setPixelShader(const PixelShader *shader)
2424         {
2425                 context->pixelShader = shader;
2426
2427                 loadConstants(shader);
2428         }
2429
2430         void Renderer::setVertexShader(const VertexShader *shader)
2431         {
2432                 context->vertexShader = shader;
2433
2434                 loadConstants(shader);
2435         }
2436
2437         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2438         {
2439                 for(int i = 0; i < DRAW_COUNT; i++)
2440                 {
2441                         if(drawCall[i]->psDirtyConstF < index + count)
2442                         {
2443                                 drawCall[i]->psDirtyConstF = index + count;
2444                         }
2445                 }
2446
2447                 for(int i = 0; i < count; i++)
2448                 {
2449                         PixelProcessor::setFloatConstant(index + i, value);
2450                         value += 4;
2451                 }
2452         }
2453
2454         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2455         {
2456                 for(int i = 0; i < DRAW_COUNT; i++)
2457                 {
2458                         if(drawCall[i]->psDirtyConstI < index + count)
2459                         {
2460                                 drawCall[i]->psDirtyConstI = index + count;
2461                         }
2462                 }
2463
2464                 for(int i = 0; i < count; i++)
2465                 {
2466                         PixelProcessor::setIntegerConstant(index + i, value);
2467                         value += 4;
2468                 }
2469         }
2470
2471         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2472         {
2473                 for(int i = 0; i < DRAW_COUNT; i++)
2474                 {
2475                         if(drawCall[i]->psDirtyConstB < index + count)
2476                         {
2477                                 drawCall[i]->psDirtyConstB = index + count;
2478                         }
2479                 }
2480
2481                 for(int i = 0; i < count; i++)
2482                 {
2483                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2484                         boolean++;
2485                 }
2486         }
2487
2488         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2489         {
2490                 for(int i = 0; i < DRAW_COUNT; i++)
2491                 {
2492                         if(drawCall[i]->vsDirtyConstF < index + count)
2493                         {
2494                                 drawCall[i]->vsDirtyConstF = index + count;
2495                         }
2496                 }
2497
2498                 for(int i = 0; i < count; i++)
2499                 {
2500                         VertexProcessor::setFloatConstant(index + i, value);
2501                         value += 4;
2502                 }
2503         }
2504
2505         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2506         {
2507                 for(int i = 0; i < DRAW_COUNT; i++)
2508                 {
2509                         if(drawCall[i]->vsDirtyConstI < index + count)
2510                         {
2511                                 drawCall[i]->vsDirtyConstI = index + count;
2512                         }
2513                 }
2514
2515                 for(int i = 0; i < count; i++)
2516                 {
2517                         VertexProcessor::setIntegerConstant(index + i, value);
2518                         value += 4;
2519                 }
2520         }
2521
2522         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2523         {
2524                 for(int i = 0; i < DRAW_COUNT; i++)
2525                 {
2526                         if(drawCall[i]->vsDirtyConstB < index + count)
2527                         {
2528                                 drawCall[i]->vsDirtyConstB = index + count;
2529                         }
2530                 }
2531
2532                 for(int i = 0; i < count; i++)
2533                 {
2534                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2535                         boolean++;
2536                 }
2537         }
2538
2539         void Renderer::setModelMatrix(const Matrix &M, int i)
2540         {
2541                 VertexProcessor::setModelMatrix(M, i);
2542         }
2543
2544         void Renderer::setViewMatrix(const Matrix &V)
2545         {
2546                 VertexProcessor::setViewMatrix(V);
2547                 updateClipPlanes = true;
2548         }
2549
2550         void Renderer::setBaseMatrix(const Matrix &B)
2551         {
2552                 VertexProcessor::setBaseMatrix(B);
2553                 updateClipPlanes = true;
2554         }
2555
2556         void Renderer::setProjectionMatrix(const Matrix &P)
2557         {
2558                 VertexProcessor::setProjectionMatrix(P);
2559                 updateClipPlanes = true;
2560         }
2561
2562         void Renderer::addQuery(Query *query)
2563         {
2564                 queries.push_back(query);
2565         }
2566
2567         void Renderer::removeQuery(Query *query)
2568         {
2569                 queries.remove(query);
2570         }
2571
2572         #if PERF_HUD
2573                 int Renderer::getThreadCount()
2574                 {
2575                         return threadCount;
2576                 }
2577
2578                 int64_t Renderer::getVertexTime(int thread)
2579                 {
2580                         return vertexTime[thread];
2581                 }
2582
2583                 int64_t Renderer::getSetupTime(int thread)
2584                 {
2585                         return setupTime[thread];
2586                 }
2587
2588                 int64_t Renderer::getPixelTime(int thread)
2589                 {
2590                         return pixelTime[thread];
2591                 }
2592
2593                 void Renderer::resetTimers()
2594                 {
2595                         for(int thread = 0; thread < threadCount; thread++)
2596                         {
2597                                 vertexTime[thread] = 0;
2598                                 setupTime[thread] = 0;
2599                                 pixelTime[thread] = 0;
2600                         }
2601                 }
2602         #endif
2603
2604         void Renderer::setViewport(const Viewport &viewport)
2605         {
2606                 this->viewport = viewport;
2607         }
2608
2609         void Renderer::setScissor(const Rect &scissor)
2610         {
2611                 this->scissor = scissor;
2612         }
2613
2614         void Renderer::setClipFlags(int flags)
2615         {
2616                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2617         }
2618
2619         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2620         {
2621                 if(index < MAX_CLIP_PLANES)
2622                 {
2623                         userPlane[index] = plane;
2624                 }
2625                 else ASSERT(false);
2626
2627                 updateClipPlanes = true;
2628         }
2629
2630         void Renderer::updateConfiguration(bool initialUpdate)
2631         {
2632                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2633
2634                 if(newConfiguration || initialUpdate)
2635                 {
2636                         terminateThreads();
2637
2638                         SwiftConfig::Configuration configuration = {};
2639                         swiftConfig->getConfiguration(configuration);
2640
2641                         precacheVertex = !newConfiguration && configuration.precache;
2642                         precacheSetup = !newConfiguration && configuration.precache;
2643                         precachePixel = !newConfiguration && configuration.precache;
2644
2645                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2646                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2647                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2648
2649                         switch(configuration.textureSampleQuality)
2650                         {
2651                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2652                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2653                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2654                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2655                         }
2656
2657                         switch(configuration.mipmapQuality)
2658                         {
2659                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2660                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2661                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2662                         }
2663
2664                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2665
2666                         switch(configuration.transcendentalPrecision)
2667                         {
2668                         case 0:
2669                                 logPrecision = APPROXIMATE;
2670                                 expPrecision = APPROXIMATE;
2671                                 rcpPrecision = APPROXIMATE;
2672                                 rsqPrecision = APPROXIMATE;
2673                                 break;
2674                         case 1:
2675                                 logPrecision = PARTIAL;
2676                                 expPrecision = PARTIAL;
2677                                 rcpPrecision = PARTIAL;
2678                                 rsqPrecision = PARTIAL;
2679                                 break;
2680                         case 2:
2681                                 logPrecision = ACCURATE;
2682                                 expPrecision = ACCURATE;
2683                                 rcpPrecision = ACCURATE;
2684                                 rsqPrecision = ACCURATE;
2685                                 break;
2686                         case 3:
2687                                 logPrecision = WHQL;
2688                                 expPrecision = WHQL;
2689                                 rcpPrecision = WHQL;
2690                                 rsqPrecision = WHQL;
2691                                 break;
2692                         case 4:
2693                                 logPrecision = IEEE;
2694                                 expPrecision = IEEE;
2695                                 rcpPrecision = IEEE;
2696                                 rsqPrecision = IEEE;
2697                                 break;
2698                         default:
2699                                 logPrecision = ACCURATE;
2700                                 expPrecision = ACCURATE;
2701                                 rcpPrecision = ACCURATE;
2702                                 rsqPrecision = ACCURATE;
2703                                 break;
2704                         }
2705
2706                         switch(configuration.transparencyAntialiasing)
2707                         {
2708                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2709                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2710                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2711                         }
2712
2713                         switch(configuration.threadCount)
2714                         {
2715                         case -1: threadCount = CPUID::coreCount();        break;
2716                         case 0:  threadCount = CPUID::processAffinity();  break;
2717                         default: threadCount = configuration.threadCount; break;
2718                         }
2719
2720                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2721                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2722                         CPUID::setEnableSSE3(configuration.enableSSE3);
2723                         CPUID::setEnableSSE2(configuration.enableSSE2);
2724                         CPUID::setEnableSSE(configuration.enableSSE);
2725
2726                         for(int pass = 0; pass < 10; pass++)
2727                         {
2728                                 optimization[pass] = configuration.optimization[pass];
2729                         }
2730
2731                         forceWindowed = configuration.forceWindowed;
2732                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2733                         postBlendSRGB = configuration.postBlendSRGB;
2734                         exactColorRounding = configuration.exactColorRounding;
2735                         forceClearRegisters = configuration.forceClearRegisters;
2736
2737                 #ifndef NDEBUG
2738                         minPrimitives = configuration.minPrimitives;
2739                         maxPrimitives = configuration.maxPrimitives;
2740                 #endif
2741                 }
2742
2743                 if(!initialUpdate && !worker[0])
2744                 {
2745                         initializeThreads();
2746                 }
2747         }
2748 }