OSDN Git Service

90db545afef9f6f9172e2b05752650f822df550b
[android-x86/external-swiftshader.git] / src / Device / Renderer.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Polygon.hpp"
19 #include "Primitive.hpp"
20 #include "Vertex.hpp"
21 #include "Pipeline/Constants.hpp"
22 #include "Pipeline/SpirvShader.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Half.hpp"
26 #include "System/Math.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Timer.hpp"
29 #include "Vulkan/VkConfig.h"
30 #include "Vulkan/VkDevice.hpp"
31 #include "Vulkan/VkFence.hpp"
32 #include "Vulkan/VkImageView.hpp"
33 #include "Vulkan/VkQueryPool.hpp"
34
35 #include "marl/containers.h"
36 #include "marl/defer.h"
37 #include "marl/trace.h"
38
39 #undef max
40
41 #ifndef NDEBUG
42 unsigned int minPrimitives = 1;
43 unsigned int maxPrimitives = 1 << 21;
44 #endif
45
46 namespace sw {
47
48 template<typename T>
49 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
50 {
51         bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
52
53         switch(topology)
54         {
55                 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
56                 {
57                         auto index = start;
58                         auto pointBatch = &(batch[0][0]);
59                         for(unsigned int i = 0; i < triangleCount; i++)
60                         {
61                                 *pointBatch++ = indices[index++];
62                         }
63
64                         // Repeat the last index to allow for SIMD width overrun.
65                         index--;
66                         for(unsigned int i = 0; i < 3; i++)
67                         {
68                                 *pointBatch++ = indices[index];
69                         }
70                         break;
71                 }
72                 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
73                 {
74                         auto index = 2 * start;
75                         for(unsigned int i = 0; i < triangleCount; i++)
76                         {
77                                 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
78                                 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
79                                 batch[i][2] = indices[index + 1];
80
81                                 index += 2;
82                         }
83                         break;
84                 }
85                 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
86                 {
87                         auto index = start;
88                         for(unsigned int i = 0; i < triangleCount; i++)
89                         {
90                                 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
91                                 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
92                                 batch[i][2] = indices[index + 1];
93
94                                 index += 1;
95                         }
96                         break;
97                 }
98                 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
99                 {
100                         auto index = 3 * start;
101                         for(unsigned int i = 0; i < triangleCount; i++)
102                         {
103                                 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
104                                 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
105                                 batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
106
107                                 index += 3;
108                         }
109                         break;
110                 }
111                 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
112                 {
113                         auto index = start;
114                         for(unsigned int i = 0; i < triangleCount; i++)
115                         {
116                                 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
117                                 batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
118                                 batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
119
120                                 index += 1;
121                         }
122                         break;
123                 }
124                 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
125                 {
126                         auto index = start + 1;
127                         for(unsigned int i = 0; i < triangleCount; i++)
128                         {
129                                 batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
130                                 batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
131                                 batch[i][provokeFirst ? 2 : 1] = indices[0];
132
133                                 index += 1;
134                         }
135                         break;
136                 }
137                 default:
138                         ASSERT(false);
139                         return false;
140         }
141
142         return true;
143 }
144
145 DrawCall::DrawCall()
146 {
147         data = (DrawData *)allocate(sizeof(DrawData));
148         data->constants = &constants;
149 }
150
151 DrawCall::~DrawCall()
152 {
153         deallocate(data);
154 }
155
156 Renderer::Renderer(vk::Device *device)
157     : device(device)
158 {
159         VertexProcessor::setRoutineCacheSize(1024);
160         PixelProcessor::setRoutineCacheSize(1024);
161         SetupProcessor::setRoutineCacheSize(1024);
162 }
163
164 Renderer::~Renderer()
165 {
166         drawTickets.take().wait();
167 }
168
169 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
170 void *Renderer::operator new(size_t size)
171 {
172         ASSERT(size == sizeof(Renderer));  // This operator can't be called from a derived class
173         return vk::allocate(sizeof(Renderer), alignof(Renderer), vk::DEVICE_MEMORY, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
174 }
175
176 void Renderer::operator delete(void *mem)
177 {
178         vk::deallocate(mem, vk::DEVICE_MEMORY);
179 }
180
181 void Renderer::draw(const sw::Context *context, VkIndexType indexType, unsigned int count, int baseVertex,
182                     TaskEvents *events, int instanceID, int viewID, void *indexBuffer, const VkExtent3D &framebufferExtent,
183                     PushConstantStorage const &pushConstants, bool update)
184 {
185         if(count == 0) { return; }
186
187         auto id = nextDrawID++;
188         MARL_SCOPED_EVENT("draw %d", id);
189
190 #ifndef NDEBUG
191         {
192                 unsigned int minPrimitives = 1;
193                 unsigned int maxPrimitives = 1 << 21;
194                 if(count < minPrimitives || count > maxPrimitives)
195                 {
196                         return;
197                 }
198         }
199 #endif
200
201         int ms = context->sampleCount;
202
203         if(!context->multiSampleMask)
204         {
205                 return;
206         }
207
208         marl::Pool<sw::DrawCall>::Loan draw;
209         {
210                 MARL_SCOPED_EVENT("drawCallPool.borrow()");
211                 draw = drawCallPool.borrow();
212         }
213         draw->id = id;
214
215         if(update)
216         {
217                 MARL_SCOPED_EVENT("update");
218                 vertexState = VertexProcessor::update(context);
219                 setupState = SetupProcessor::update(context);
220                 pixelState = PixelProcessor::update(context);
221
222                 vertexRoutine = VertexProcessor::routine(vertexState, context->pipelineLayout, context->vertexShader, context->descriptorSets);
223                 setupRoutine = SetupProcessor::routine(setupState);
224                 pixelRoutine = PixelProcessor::routine(pixelState, context->pipelineLayout, context->pixelShader, context->descriptorSets);
225         }
226
227         DrawCall::SetupFunction setupPrimitives = nullptr;
228         unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
229
230         if(context->isDrawTriangle(false))
231         {
232                 switch(context->polygonMode)
233                 {
234                         case VK_POLYGON_MODE_FILL:
235                                 setupPrimitives = &DrawCall::setupSolidTriangles;
236                                 break;
237                         case VK_POLYGON_MODE_LINE:
238                                 setupPrimitives = &DrawCall::setupWireframeTriangles;
239                                 numPrimitivesPerBatch /= 3;
240                                 break;
241                         case VK_POLYGON_MODE_POINT:
242                                 setupPrimitives = &DrawCall::setupPointTriangles;
243                                 numPrimitivesPerBatch /= 3;
244                                 break;
245                         default:
246                                 UNSUPPORTED("polygon mode: %d", int(context->polygonMode));
247                                 return;
248                 }
249         }
250         else if(context->isDrawLine(false))
251         {
252                 setupPrimitives = &DrawCall::setupLines;
253         }
254         else  // Point primitive topology
255         {
256                 setupPrimitives = &DrawCall::setupPoints;
257         }
258
259         DrawData *data = draw->data;
260         draw->occlusionQuery = occlusionQuery;
261         draw->batchDataPool = &batchDataPool;
262         draw->numPrimitives = count;
263         draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
264         draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
265         draw->topology = context->topology;
266         draw->provokingVertexMode = context->provokingVertexMode;
267         draw->indexType = indexType;
268         draw->lineRasterizationMode = context->lineRasterizationMode;
269
270         draw->vertexRoutine = vertexRoutine;
271         draw->setupRoutine = setupRoutine;
272         draw->pixelRoutine = pixelRoutine;
273         draw->setupPrimitives = setupPrimitives;
274         draw->setupState = setupState;
275
276         data->descriptorSets = context->descriptorSets;
277         data->descriptorDynamicOffsets = context->descriptorDynamicOffsets;
278
279         for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
280         {
281                 data->input[i] = context->input[i].buffer;
282                 data->robustnessSize[i] = context->input[i].robustnessSize;
283                 data->stride[i] = context->input[i].vertexStride;
284         }
285
286         data->indices = indexBuffer;
287         data->viewID = viewID;
288         data->instanceID = instanceID;
289         data->baseVertex = baseVertex;
290
291         if(pixelState.stencilActive)
292         {
293                 data->stencil[0].set(context->frontStencil.reference, context->frontStencil.compareMask, context->frontStencil.writeMask);
294                 data->stencil[1].set(context->backStencil.reference, context->backStencil.compareMask, context->backStencil.writeMask);
295         }
296
297         data->lineWidth = context->lineWidth;
298
299         data->factor = factor;
300
301         if(pixelState.alphaToCoverage)
302         {
303                 if(ms == 4)
304                 {
305                         data->a2c0 = float4(0.2f);
306                         data->a2c1 = float4(0.4f);
307                         data->a2c2 = float4(0.6f);
308                         data->a2c3 = float4(0.8f);
309                 }
310                 else if(ms == 2)
311                 {
312                         data->a2c0 = float4(0.25f);
313                         data->a2c1 = float4(0.75f);
314                 }
315                 else
316                         ASSERT(false);
317         }
318
319         if(pixelState.occlusionEnabled)
320         {
321                 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
322                 {
323                         data->occlusion[cluster] = 0;
324                 }
325         }
326
327         // Viewport
328         {
329                 float W = 0.5f * viewport.width;
330                 float H = 0.5f * viewport.height;
331                 float X0 = viewport.x + W;
332                 float Y0 = viewport.y + H;
333                 float N = viewport.minDepth;
334                 float F = viewport.maxDepth;
335                 float Z = F - N;
336                 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
337
338                 if(context->isDrawTriangle(false))
339                 {
340                         N += context->depthBias;
341                 }
342
343                 data->WxF = float4(W * subPixF);
344                 data->HxF = float4(H * subPixF);
345                 data->X0xF = float4(X0 * subPixF - subPixF / 2);
346                 data->Y0xF = float4(Y0 * subPixF - subPixF / 2);
347                 data->halfPixelX = float4(0.5f / W);
348                 data->halfPixelY = float4(0.5f / H);
349                 data->viewportHeight = abs(viewport.height);
350                 data->slopeDepthBias = context->slopeDepthBias;
351                 data->depthRange = Z;
352                 data->depthNear = N;
353         }
354
355         // Target
356         {
357                 for(int index = 0; index < RENDERTARGETS; index++)
358                 {
359                         draw->renderTarget[index] = context->renderTarget[index];
360
361                         if(draw->renderTarget[index])
362                         {
363                                 data->colorBuffer[index] = (unsigned int *)context->renderTarget[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->viewID);
364                                 data->colorPitchB[index] = context->renderTarget[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
365                                 data->colorSliceB[index] = context->renderTarget[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
366                         }
367                 }
368
369                 draw->depthBuffer = context->depthBuffer;
370                 draw->stencilBuffer = context->stencilBuffer;
371
372                 if(draw->depthBuffer)
373                 {
374                         data->depthBuffer = (float *)context->depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->viewID);
375                         data->depthPitchB = context->depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
376                         data->depthSliceB = context->depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
377                 }
378
379                 if(draw->stencilBuffer)
380                 {
381                         data->stencilBuffer = (unsigned char *)context->stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->viewID);
382                         data->stencilPitchB = context->stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
383                         data->stencilSliceB = context->stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
384                 }
385         }
386
387         // Scissor
388         {
389                 data->scissorX0 = clamp<int>(scissor.offset.x, 0, framebufferExtent.width);
390                 data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, 0, framebufferExtent.width);
391                 data->scissorY0 = clamp<int>(scissor.offset.y, 0, framebufferExtent.height);
392                 data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, 0, framebufferExtent.height);
393         }
394
395         // Push constants
396         {
397                 data->pushConstants = pushConstants;
398         }
399
400         draw->events = events;
401
402         DrawCall::run(draw, &drawTickets, clusterQueues);
403 }
404
405 void DrawCall::setup()
406 {
407         if(occlusionQuery != nullptr)
408         {
409                 occlusionQuery->start();
410         }
411
412         if(events)
413         {
414                 events->start();
415         }
416 }
417
418 void DrawCall::teardown()
419 {
420         if(events)
421         {
422                 events->finish();
423                 events = nullptr;
424         }
425
426         if(occlusionQuery != nullptr)
427         {
428                 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
429                 {
430                         occlusionQuery->add(data->occlusion[cluster]);
431                 }
432                 occlusionQuery->finish();
433         }
434
435         vertexRoutine = {};
436         setupRoutine = {};
437         pixelRoutine = {};
438 }
439
440 void DrawCall::run(const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
441 {
442         draw->setup();
443
444         auto const numPrimitives = draw->numPrimitives;
445         auto const numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
446         auto const numBatches = draw->numBatches;
447
448         auto ticket = tickets->take();
449         auto finally = marl::make_shared_finally([draw, ticket] {
450                 MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
451                 draw->teardown();
452                 ticket.done();
453         });
454
455         for(unsigned int batchId = 0; batchId < numBatches; batchId++)
456         {
457                 auto batch = draw->batchDataPool->borrow();
458                 batch->id = batchId;
459                 batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
460                 batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
461
462                 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
463                 {
464                         batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
465                 }
466
467                 marl::schedule([draw, batch, finally] {
468                         processVertices(draw.get(), batch.get());
469
470                         if(!draw->setupState.rasterizerDiscard)
471                         {
472                                 processPrimitives(draw.get(), batch.get());
473
474                                 if(batch->numVisible > 0)
475                                 {
476                                         processPixels(draw, batch, finally);
477                                         return;
478                                 }
479                         }
480
481                         for(int cluster = 0; cluster < MaxClusterCount; cluster++)
482                         {
483                                 batch->clusterTickets[cluster].done();
484                         }
485                 });
486         }
487 }
488
489 void DrawCall::processVertices(DrawCall *draw, BatchData *batch)
490 {
491         MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
492
493         unsigned int triangleIndices[MaxBatchSize + 1][3];  // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
494         {
495                 MARL_SCOPED_EVENT("processPrimitiveVertices");
496                 processPrimitiveVertices(
497                     triangleIndices,
498                     draw->data->indices,
499                     draw->indexType,
500                     batch->firstPrimitive,
501                     batch->numPrimitives,
502                     draw->topology,
503                     draw->provokingVertexMode);
504         }
505
506         auto &vertexTask = batch->vertexTask;
507         vertexTask.primitiveStart = batch->firstPrimitive;
508         // We're only using batch compaction for points, not lines
509         vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
510         if(vertexTask.vertexCache.drawCall != draw->id)
511         {
512                 vertexTask.vertexCache.clear();
513                 vertexTask.vertexCache.drawCall = draw->id;
514         }
515
516         draw->vertexRoutine(&batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
517 }
518
519 void DrawCall::processPrimitives(DrawCall *draw, BatchData *batch)
520 {
521         MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
522         auto triangles = &batch->triangles[0];
523         auto primitives = &batch->primitives[0];
524         batch->numVisible = draw->setupPrimitives(triangles, primitives, draw, batch->numPrimitives);
525 }
526
527 void DrawCall::processPixels(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
528 {
529         struct Data
530         {
531                 Data(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
532                     : draw(draw)
533                     , batch(batch)
534                     , finally(finally)
535                 {}
536                 marl::Loan<DrawCall> draw;
537                 marl::Loan<BatchData> batch;
538                 std::shared_ptr<marl::Finally> finally;
539         };
540         auto data = std::make_shared<Data>(draw, batch, finally);
541         for(int cluster = 0; cluster < MaxClusterCount; cluster++)
542         {
543                 batch->clusterTickets[cluster].onCall([data, cluster] {
544                         auto &draw = data->draw;
545                         auto &batch = data->batch;
546                         MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
547                         draw->pixelRoutine(&batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
548                         batch->clusterTickets[cluster].done();
549                 });
550         }
551 }
552
553 void Renderer::synchronize()
554 {
555         MARL_SCOPED_EVENT("synchronize");
556         auto ticket = drawTickets.take();
557         ticket.wait();
558         device->updateSamplingRoutineConstCache();
559         ticket.done();
560 }
561
562 void DrawCall::processPrimitiveVertices(
563     unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
564     const void *primitiveIndices,
565     VkIndexType indexType,
566     unsigned int start,
567     unsigned int triangleCount,
568     VkPrimitiveTopology topology,
569     VkProvokingVertexModeEXT provokingVertexMode)
570 {
571         if(!primitiveIndices)
572         {
573                 struct LinearIndex
574                 {
575                         unsigned int operator[](unsigned int i) { return i; }
576                 };
577
578                 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
579                 {
580                         return;
581                 }
582         }
583         else
584         {
585                 switch(indexType)
586                 {
587                         case VK_INDEX_TYPE_UINT16:
588                                 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t *>(primitiveIndices), start, triangleCount))
589                                 {
590                                         return;
591                                 }
592                                 break;
593                         case VK_INDEX_TYPE_UINT32:
594                                 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t *>(primitiveIndices), start, triangleCount))
595                                 {
596                                         return;
597                                 }
598                                 break;
599                                 break;
600                         default:
601                                 ASSERT(false);
602                                 return;
603                 }
604         }
605
606         // setBatchIndices() takes care of the point case, since it's different due to the compaction
607         if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
608         {
609                 // Repeat the last index to allow for SIMD width overrun.
610                 triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
611                 triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
612                 triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
613         }
614 }
615
616 int DrawCall::setupSolidTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
617 {
618         auto &state = drawCall->setupState;
619
620         int ms = state.multiSampleCount;
621         const DrawData *data = drawCall->data;
622         int visible = 0;
623
624         for(int i = 0; i < count; i++, triangles++)
625         {
626                 Vertex &v0 = triangles->v0;
627                 Vertex &v1 = triangles->v1;
628                 Vertex &v2 = triangles->v2;
629
630                 Polygon polygon(&v0.position, &v1.position, &v2.position);
631
632                 if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
633                 {
634                         continue;
635                 }
636
637                 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
638                 {
639                         continue;
640                 }
641
642                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
643                 if(clipFlagsOr != Clipper::CLIP_FINITE)
644                 {
645                         if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
646                         {
647                                 continue;
648                         }
649                 }
650
651                 if(drawCall->setupRoutine(primitives, triangles, &polygon, data))
652                 {
653                         primitives += ms;
654                         visible++;
655                 }
656         }
657
658         return visible;
659 }
660
661 int DrawCall::setupWireframeTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
662 {
663         auto &state = drawCall->setupState;
664
665         int ms = state.multiSampleCount;
666         int visible = 0;
667
668         for(int i = 0; i < count; i++)
669         {
670                 const Vertex &v0 = triangles[i].v0;
671                 const Vertex &v1 = triangles[i].v1;
672                 const Vertex &v2 = triangles[i].v2;
673
674                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
675                           (v0.x * v2.y - v0.y * v2.x) * v1.w +
676                           (v2.x * v1.y - v1.x * v2.y) * v0.w;
677
678                 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
679                 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
680                 {
681                         if(frontFacing) continue;
682                 }
683                 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
684                 {
685                         if(!frontFacing) continue;
686                 }
687
688                 Triangle lines[3];
689                 lines[0].v0 = v0;
690                 lines[0].v1 = v1;
691                 lines[1].v0 = v1;
692                 lines[1].v1 = v2;
693                 lines[2].v0 = v2;
694                 lines[2].v1 = v0;
695
696                 for(int i = 0; i < 3; i++)
697                 {
698                         if(setupLine(*primitives, lines[i], *drawCall))
699                         {
700                                 primitives += ms;
701                                 visible++;
702                         }
703                 }
704         }
705
706         return visible;
707 }
708
709 int DrawCall::setupPointTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
710 {
711         auto &state = drawCall->setupState;
712
713         int ms = state.multiSampleCount;
714         int visible = 0;
715
716         for(int i = 0; i < count; i++)
717         {
718                 const Vertex &v0 = triangles[i].v0;
719                 const Vertex &v1 = triangles[i].v1;
720                 const Vertex &v2 = triangles[i].v2;
721
722                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
723                           (v0.x * v2.y - v0.y * v2.x) * v1.w +
724                           (v2.x * v1.y - v1.x * v2.y) * v0.w;
725
726                 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
727                 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
728                 {
729                         if(frontFacing) continue;
730                 }
731                 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
732                 {
733                         if(!frontFacing) continue;
734                 }
735
736                 Triangle points[3];
737                 points[0].v0 = v0;
738                 points[1].v0 = v1;
739                 points[2].v0 = v2;
740
741                 for(int i = 0; i < 3; i++)
742                 {
743                         if(setupPoint(*primitives, points[i], *drawCall))
744                         {
745                                 primitives += ms;
746                                 visible++;
747                         }
748                 }
749         }
750
751         return visible;
752 }
753
754 int DrawCall::setupLines(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
755 {
756         auto &state = drawCall->setupState;
757
758         int visible = 0;
759         int ms = state.multiSampleCount;
760
761         for(int i = 0; i < count; i++)
762         {
763                 if(setupLine(*primitives, *triangles, *drawCall))
764                 {
765                         primitives += ms;
766                         visible++;
767                 }
768
769                 triangles++;
770         }
771
772         return visible;
773 }
774
775 int DrawCall::setupPoints(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
776 {
777         auto &state = drawCall->setupState;
778
779         int visible = 0;
780         int ms = state.multiSampleCount;
781
782         for(int i = 0; i < count; i++)
783         {
784                 if(setupPoint(*primitives, *triangles, *drawCall))
785                 {
786                         primitives += ms;
787                         visible++;
788                 }
789
790                 triangles++;
791         }
792
793         return visible;
794 }
795
796 bool DrawCall::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
797 {
798         const DrawData &data = *draw.data;
799
800         float lineWidth = data.lineWidth;
801
802         Vertex &v0 = triangle.v0;
803         Vertex &v1 = triangle.v1;
804
805         if((v0.cullMask | v1.cullMask) == 0)
806         {
807                 return false;
808         }
809
810         const float4 &P0 = v0.position;
811         const float4 &P1 = v1.position;
812
813         if(P0.w <= 0 && P1.w <= 0)
814         {
815                 return false;
816         }
817
818         constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
819
820         const float W = data.WxF[0] * (1.0f / subPixF);
821         const float H = data.HxF[0] * (1.0f / subPixF);
822
823         float dx = W * (P1.x / P1.w - P0.x / P0.w);
824         float dy = H * (P1.y / P1.w - P0.y / P0.w);
825
826         if(dx == 0 && dy == 0)
827         {
828                 return false;
829         }
830
831         if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
832         {
833                 // Rectangle centered on the line segment
834
835                 float4 P[4];
836                 int C[4];
837
838                 P[0] = P0;
839                 P[1] = P1;
840                 P[2] = P1;
841                 P[3] = P0;
842
843                 float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy);
844
845                 dx *= scale;
846                 dy *= scale;
847
848                 float dx0h = dx * P0.w / H;
849                 float dy0w = dy * P0.w / W;
850
851                 float dx1h = dx * P1.w / H;
852                 float dy1w = dy * P1.w / W;
853
854                 P[0].x += -dy0w;
855                 P[0].y += +dx0h;
856                 C[0] = Clipper::ComputeClipFlags(P[0]);
857
858                 P[1].x += -dy1w;
859                 P[1].y += +dx1h;
860                 C[1] = Clipper::ComputeClipFlags(P[1]);
861
862                 P[2].x += +dy1w;
863                 P[2].y += -dx1h;
864                 C[2] = Clipper::ComputeClipFlags(P[2]);
865
866                 P[3].x += +dy0w;
867                 P[3].y += -dx0h;
868                 C[3] = Clipper::ComputeClipFlags(P[3]);
869
870                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
871                 {
872                         Polygon polygon(P, 4);
873
874                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
875
876                         if(clipFlagsOr != Clipper::CLIP_FINITE)
877                         {
878                                 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
879                                 {
880                                         return false;
881                                 }
882                         }
883
884                         return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
885                 }
886         }
887         else if(false)  // TODO(b/80135519): Deprecate
888         {
889                 // Connecting diamonds polygon
890                 // This shape satisfies the diamond test convention, except for the exit rule part.
891                 // Line segments with overlapping endpoints have duplicate fragments.
892                 // The ideal algorithm requires half-open line rasterization (b/80135519).
893
894                 float4 P[8];
895                 int C[8];
896
897                 P[0] = P0;
898                 P[1] = P0;
899                 P[2] = P0;
900                 P[3] = P0;
901                 P[4] = P1;
902                 P[5] = P1;
903                 P[6] = P1;
904                 P[7] = P1;
905
906                 float dx0 = lineWidth * 0.5f * P0.w / W;
907                 float dy0 = lineWidth * 0.5f * P0.w / H;
908
909                 float dx1 = lineWidth * 0.5f * P1.w / W;
910                 float dy1 = lineWidth * 0.5f * P1.w / H;
911
912                 P[0].x += -dx0;
913                 C[0] = Clipper::ComputeClipFlags(P[0]);
914
915                 P[1].y += +dy0;
916                 C[1] = Clipper::ComputeClipFlags(P[1]);
917
918                 P[2].x += +dx0;
919                 C[2] = Clipper::ComputeClipFlags(P[2]);
920
921                 P[3].y += -dy0;
922                 C[3] = Clipper::ComputeClipFlags(P[3]);
923
924                 P[4].x += -dx1;
925                 C[4] = Clipper::ComputeClipFlags(P[4]);
926
927                 P[5].y += +dy1;
928                 C[5] = Clipper::ComputeClipFlags(P[5]);
929
930                 P[6].x += +dx1;
931                 C[6] = Clipper::ComputeClipFlags(P[6]);
932
933                 P[7].y += -dy1;
934                 C[7] = Clipper::ComputeClipFlags(P[7]);
935
936                 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
937                 {
938                         float4 L[6];
939
940                         if(dx > -dy)
941                         {
942                                 if(dx > dy)  // Right
943                                 {
944                                         L[0] = P[0];
945                                         L[1] = P[1];
946                                         L[2] = P[5];
947                                         L[3] = P[6];
948                                         L[4] = P[7];
949                                         L[5] = P[3];
950                                 }
951                                 else  // Down
952                                 {
953                                         L[0] = P[0];
954                                         L[1] = P[4];
955                                         L[2] = P[5];
956                                         L[3] = P[6];
957                                         L[4] = P[2];
958                                         L[5] = P[3];
959                                 }
960                         }
961                         else
962                         {
963                                 if(dx > dy)  // Up
964                                 {
965                                         L[0] = P[0];
966                                         L[1] = P[1];
967                                         L[2] = P[2];
968                                         L[3] = P[6];
969                                         L[4] = P[7];
970                                         L[5] = P[4];
971                                 }
972                                 else  // Left
973                                 {
974                                         L[0] = P[1];
975                                         L[1] = P[2];
976                                         L[2] = P[3];
977                                         L[3] = P[7];
978                                         L[4] = P[4];
979                                         L[5] = P[5];
980                                 }
981                         }
982
983                         Polygon polygon(L, 6);
984
985                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7];
986
987                         if(clipFlagsOr != Clipper::CLIP_FINITE)
988                         {
989                                 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
990                                 {
991                                         return false;
992                                 }
993                         }
994
995                         return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
996                 }
997         }
998         else
999         {
1000                 // Parallelogram approximating Bresenham line
1001                 // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1002                 // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1003                 // requirements for Bresenham line segment rasterization.
1004
1005                 float4 P[8];
1006                 P[0] = P0;
1007                 P[1] = P0;
1008                 P[2] = P0;
1009                 P[3] = P0;
1010                 P[4] = P1;
1011                 P[5] = P1;
1012                 P[6] = P1;
1013                 P[7] = P1;
1014
1015                 float dx0 = lineWidth * 0.5f * P0.w / W;
1016                 float dy0 = lineWidth * 0.5f * P0.w / H;
1017
1018                 float dx1 = lineWidth * 0.5f * P1.w / W;
1019                 float dy1 = lineWidth * 0.5f * P1.w / H;
1020
1021                 P[0].x += -dx0;
1022                 P[1].y += +dy0;
1023                 P[2].x += +dx0;
1024                 P[3].y += -dy0;
1025                 P[4].x += -dx1;
1026                 P[5].y += +dy1;
1027                 P[6].x += +dx1;
1028                 P[7].y += -dy1;
1029
1030                 float4 L[4];
1031
1032                 if(dx > -dy)
1033                 {
1034                         if(dx > dy)  // Right
1035                         {
1036                                 L[0] = P[1];
1037                                 L[1] = P[5];
1038                                 L[2] = P[7];
1039                                 L[3] = P[3];
1040                         }
1041                         else  // Down
1042                         {
1043                                 L[0] = P[0];
1044                                 L[1] = P[4];
1045                                 L[2] = P[6];
1046                                 L[3] = P[2];
1047                         }
1048                 }
1049                 else
1050                 {
1051                         if(dx > dy)  // Up
1052                         {
1053                                 L[0] = P[0];
1054                                 L[1] = P[2];
1055                                 L[2] = P[6];
1056                                 L[3] = P[4];
1057                         }
1058                         else  // Left
1059                         {
1060                                 L[0] = P[1];
1061                                 L[1] = P[3];
1062                                 L[2] = P[7];
1063                                 L[3] = P[5];
1064                         }
1065                 }
1066
1067                 int C0 = Clipper::ComputeClipFlags(L[0]);
1068                 int C1 = Clipper::ComputeClipFlags(L[1]);
1069                 int C2 = Clipper::ComputeClipFlags(L[2]);
1070                 int C3 = Clipper::ComputeClipFlags(L[3]);
1071
1072                 if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE)
1073                 {
1074                         Polygon polygon(L, 4);
1075
1076                         int clipFlagsOr = C0 | C1 | C2 | C3;
1077
1078                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1079                         {
1080                                 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1081                                 {
1082                                         return false;
1083                                 }
1084                         }
1085
1086                         return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
1087                 }
1088         }
1089
1090         return false;
1091 }
1092
1093 bool DrawCall::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1094 {
1095         const DrawData &data = *draw.data;
1096
1097         Vertex &v = triangle.v0;
1098
1099         if(v.cullMask == 0)
1100         {
1101                 return false;
1102         }
1103
1104         float pSize = v.pointSize;
1105
1106         pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1107
1108         float4 P[4];
1109         int C[4];
1110
1111         P[0] = v.position;
1112         P[1] = v.position;
1113         P[2] = v.position;
1114         P[3] = v.position;
1115
1116         const float X = pSize * P[0].w * data.halfPixelX[0];
1117         const float Y = pSize * P[0].w * data.halfPixelY[0];
1118
1119         P[0].x -= X;
1120         P[0].y += Y;
1121         C[0] = Clipper::ComputeClipFlags(P[0]);
1122
1123         P[1].x += X;
1124         P[1].y += Y;
1125         C[1] = Clipper::ComputeClipFlags(P[1]);
1126
1127         P[2].x += X;
1128         P[2].y -= Y;
1129         C[2] = Clipper::ComputeClipFlags(P[2]);
1130
1131         P[3].x -= X;
1132         P[3].y -= Y;
1133         C[3] = Clipper::ComputeClipFlags(P[3]);
1134
1135         Polygon polygon(P, 4);
1136
1137         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1138         {
1139                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
1140
1141                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1142                 {
1143                         if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1144                         {
1145                                 return false;
1146                         }
1147                 }
1148
1149                 primitive.pointSizeInv = 1.0f / pSize;
1150
1151                 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
1152         }
1153
1154         return false;
1155 }
1156
1157 void Renderer::addQuery(vk::Query *query)
1158 {
1159         ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1160         ASSERT(!occlusionQuery);
1161
1162         occlusionQuery = query;
1163 }
1164
1165 void Renderer::removeQuery(vk::Query *query)
1166 {
1167         ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1168         ASSERT(occlusionQuery == query);
1169
1170         occlusionQuery = nullptr;
1171 }
1172
1173 // TODO(b/137740918): Optimize instancing to use a single draw call.
1174 void Renderer::advanceInstanceAttributes(Stream *inputs)
1175 {
1176         for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++)
1177         {
1178                 auto &attrib = inputs[i];
1179                 if((attrib.format != VK_FORMAT_UNDEFINED) && attrib.instanceStride && (attrib.instanceStride < attrib.robustnessSize))
1180                 {
1181                         // Under the casts: attrib.buffer += attrib.instanceStride
1182                         attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride);
1183                         attrib.robustnessSize -= attrib.instanceStride;
1184                 }
1185         }
1186 }
1187
1188 void Renderer::setViewport(const VkViewport &viewport)
1189 {
1190         this->viewport = viewport;
1191 }
1192
1193 void Renderer::setScissor(const VkRect2D &scissor)
1194 {
1195         this->scissor = scissor;
1196 }
1197
1198 }  // namespace sw