OSDN Git Service

util/vulkan: Move Vulkan utilities to src/vulkan/util
[android-x86/external-mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "gfx9d.h"
46 #include "util/debug.h"
47
48 static int
49 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
50 {
51         uint32_t mesa_timestamp, llvm_timestamp;
52         uint16_t f = family;
53         memset(uuid, 0, VK_UUID_SIZE);
54         if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
55             !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
56                 return -1;
57
58         memcpy(uuid, &mesa_timestamp, 4);
59         memcpy((char*)uuid + 4, &llvm_timestamp, 4);
60         memcpy((char*)uuid + 8, &f, 2);
61         snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
62         return 0;
63 }
64
65 static void
66 radv_get_device_uuid(drmDevicePtr device, void *uuid) {
67         memset(uuid, 0, VK_UUID_SIZE);
68         memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
69         memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
70         memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
71         memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
72 }
73
74 static const VkExtensionProperties instance_extensions[] = {
75         {
76                 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
77                 .specVersion = 25,
78         },
79 #ifdef VK_USE_PLATFORM_XCB_KHR
80         {
81                 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
82                 .specVersion = 6,
83         },
84 #endif
85 #ifdef VK_USE_PLATFORM_XLIB_KHR
86         {
87                 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
88                 .specVersion = 6,
89         },
90 #endif
91 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
92         {
93                 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
94                 .specVersion = 5,
95         },
96 #endif
97         {
98                 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
99                 .specVersion = 1,
100         },
101         {
102                 .extensionName = VK_KHX_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
103                 .specVersion = 1,
104         },
105 };
106
107 static const VkExtensionProperties common_device_extensions[] = {
108         {
109                 .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
110                 .specVersion = 1,
111         },
112         {
113                 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
114                 .specVersion = 1,
115         },
116         {
117                 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
118                 .specVersion = 1,
119         },
120         {
121                 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
122                 .specVersion = 1,
123         },
124         {
125                 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
126                 .specVersion = 1,
127         },
128         {
129                 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
130                 .specVersion = 68,
131         },
132         {
133                 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
134                 .specVersion = 1,
135         },
136         {
137                 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
138                 .specVersion = 1,
139         },
140         {
141                 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
142                 .specVersion = 1,
143         },
144         {
145                 .extensionName = VK_KHX_EXTERNAL_MEMORY_EXTENSION_NAME,
146                 .specVersion = 1,
147         },
148         {
149                 .extensionName = VK_KHX_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
150                 .specVersion = 1,
151         },
152 };
153
154 static VkResult
155 radv_extensions_register(struct radv_instance *instance,
156                         struct radv_extensions *extensions,
157                         const VkExtensionProperties *new_ext,
158                         uint32_t num_ext)
159 {
160         size_t new_size;
161         VkExtensionProperties *new_ptr;
162
163         assert(new_ext && num_ext > 0);
164
165         if (!new_ext)
166                 return VK_ERROR_INITIALIZATION_FAILED;
167
168         new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
169         new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
170                                 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
171
172         /* Old array continues to be valid, update nothing */
173         if (!new_ptr)
174                 return VK_ERROR_OUT_OF_HOST_MEMORY;
175
176         memcpy(&new_ptr[extensions->num_ext], new_ext,
177                 num_ext * sizeof(VkExtensionProperties));
178         extensions->ext_array = new_ptr;
179         extensions->num_ext += num_ext;
180
181         return VK_SUCCESS;
182 }
183
184 static void
185 radv_extensions_finish(struct radv_instance *instance,
186                         struct radv_extensions *extensions)
187 {
188         assert(extensions);
189
190         if (!extensions)
191                 radv_loge("Attemted to free invalid extension struct\n");
192
193         if (extensions->ext_array)
194                 vk_free(&instance->alloc, extensions->ext_array);
195 }
196
197 static bool
198 is_extension_enabled(const VkExtensionProperties *extensions,
199                         size_t num_ext,
200                         const char *name)
201 {
202         assert(extensions && name);
203
204         for (uint32_t i = 0; i < num_ext; i++) {
205                 if (strcmp(name, extensions[i].extensionName) == 0)
206                         return true;
207         }
208
209         return false;
210 }
211
212 static const char *
213 get_chip_name(enum radeon_family family)
214 {
215         switch (family) {
216         case CHIP_TAHITI: return "AMD RADV TAHITI";
217         case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
218         case CHIP_VERDE: return "AMD RADV CAPE VERDE";
219         case CHIP_OLAND: return "AMD RADV OLAND";
220         case CHIP_HAINAN: return "AMD RADV HAINAN";
221         case CHIP_BONAIRE: return "AMD RADV BONAIRE";
222         case CHIP_KAVERI: return "AMD RADV KAVERI";
223         case CHIP_KABINI: return "AMD RADV KABINI";
224         case CHIP_HAWAII: return "AMD RADV HAWAII";
225         case CHIP_MULLINS: return "AMD RADV MULLINS";
226         case CHIP_TONGA: return "AMD RADV TONGA";
227         case CHIP_ICELAND: return "AMD RADV ICELAND";
228         case CHIP_CARRIZO: return "AMD RADV CARRIZO";
229         case CHIP_FIJI: return "AMD RADV FIJI";
230         case CHIP_POLARIS10: return "AMD RADV POLARIS10";
231         case CHIP_POLARIS11: return "AMD RADV POLARIS11";
232         case CHIP_POLARIS12: return "AMD RADV POLARIS12";
233         case CHIP_STONEY: return "AMD RADV STONEY";
234         case CHIP_VEGA10: return "AMD RADV VEGA";
235         case CHIP_RAVEN: return "AMD RADV RAVEN";
236         default: return "AMD RADV unknown";
237         }
238 }
239
240 static VkResult
241 radv_physical_device_init(struct radv_physical_device *device,
242                           struct radv_instance *instance,
243                           drmDevicePtr drm_device)
244 {
245         const char *path = drm_device->nodes[DRM_NODE_RENDER];
246         VkResult result;
247         drmVersionPtr version;
248         int fd;
249
250         fd = open(path, O_RDWR | O_CLOEXEC);
251         if (fd < 0)
252                 return VK_ERROR_INCOMPATIBLE_DRIVER;
253
254         version = drmGetVersion(fd);
255         if (!version) {
256                 close(fd);
257                 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
258                                  "failed to get version %s: %m", path);
259         }
260
261         if (strcmp(version->name, "amdgpu")) {
262                 drmFreeVersion(version);
263                 close(fd);
264                 return VK_ERROR_INCOMPATIBLE_DRIVER;
265         }
266         drmFreeVersion(version);
267
268         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
269         device->instance = instance;
270         assert(strlen(path) < ARRAY_SIZE(device->path));
271         strncpy(device->path, path, ARRAY_SIZE(device->path));
272
273         device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
274         if (!device->ws) {
275                 result = VK_ERROR_INCOMPATIBLE_DRIVER;
276                 goto fail;
277         }
278
279         device->local_fd = fd;
280         device->ws->query_info(device->ws, &device->rad_info);
281         result = radv_init_wsi(device);
282         if (result != VK_SUCCESS) {
283                 device->ws->destroy(device->ws);
284                 goto fail;
285         }
286
287         if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
288                 radv_finish_wsi(device);
289                 device->ws->destroy(device->ws);
290                 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
291                                    "cannot generate UUID");
292                 goto fail;
293         }
294
295         result = radv_extensions_register(instance,
296                                         &device->extensions,
297                                         common_device_extensions,
298                                         ARRAY_SIZE(common_device_extensions));
299         if (result != VK_SUCCESS)
300                 goto fail;
301
302         fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
303         device->name = get_chip_name(device->rad_info.family);
304
305         radv_get_device_uuid(drm_device, device->device_uuid);
306
307         if (device->rad_info.family == CHIP_STONEY ||
308             device->rad_info.chip_class >= GFX9) {
309                 device->has_rbplus = true;
310                 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
311         }
312
313         return VK_SUCCESS;
314
315 fail:
316         close(fd);
317         return result;
318 }
319
320 static void
321 radv_physical_device_finish(struct radv_physical_device *device)
322 {
323         radv_extensions_finish(device->instance, &device->extensions);
324         radv_finish_wsi(device);
325         device->ws->destroy(device->ws);
326         close(device->local_fd);
327 }
328
329 static void *
330 default_alloc_func(void *pUserData, size_t size, size_t align,
331                    VkSystemAllocationScope allocationScope)
332 {
333         return malloc(size);
334 }
335
336 static void *
337 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
338                      size_t align, VkSystemAllocationScope allocationScope)
339 {
340         return realloc(pOriginal, size);
341 }
342
343 static void
344 default_free_func(void *pUserData, void *pMemory)
345 {
346         free(pMemory);
347 }
348
349 static const VkAllocationCallbacks default_alloc = {
350         .pUserData = NULL,
351         .pfnAllocation = default_alloc_func,
352         .pfnReallocation = default_realloc_func,
353         .pfnFree = default_free_func,
354 };
355
356 static const struct debug_control radv_debug_options[] = {
357         {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
358         {"nodcc", RADV_DEBUG_NO_DCC},
359         {"shaders", RADV_DEBUG_DUMP_SHADERS},
360         {"nocache", RADV_DEBUG_NO_CACHE},
361         {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
362         {"nohiz", RADV_DEBUG_NO_HIZ},
363         {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
364         {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
365         {"allbos", RADV_DEBUG_ALL_BOS},
366         {"noibs", RADV_DEBUG_NO_IBS},
367         {NULL, 0}
368 };
369
370 VkResult radv_CreateInstance(
371         const VkInstanceCreateInfo*                 pCreateInfo,
372         const VkAllocationCallbacks*                pAllocator,
373         VkInstance*                                 pInstance)
374 {
375         struct radv_instance *instance;
376
377         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
378
379         uint32_t client_version;
380         if (pCreateInfo->pApplicationInfo &&
381             pCreateInfo->pApplicationInfo->apiVersion != 0) {
382                 client_version = pCreateInfo->pApplicationInfo->apiVersion;
383         } else {
384                 client_version = VK_MAKE_VERSION(1, 0, 0);
385         }
386
387         if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
388             client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
389                 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
390                                  "Client requested version %d.%d.%d",
391                                  VK_VERSION_MAJOR(client_version),
392                                  VK_VERSION_MINOR(client_version),
393                                  VK_VERSION_PATCH(client_version));
394         }
395
396         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
397                 if (!is_extension_enabled(instance_extensions,
398                                         ARRAY_SIZE(instance_extensions),
399                                         pCreateInfo->ppEnabledExtensionNames[i]))
400                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
401         }
402
403         instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
404                                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
405         if (!instance)
406                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
407
408         memset(instance, 0, sizeof(*instance));
409
410         instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
411
412         if (pAllocator)
413                 instance->alloc = *pAllocator;
414         else
415                 instance->alloc = default_alloc;
416
417         instance->apiVersion = client_version;
418         instance->physicalDeviceCount = -1;
419
420         _mesa_locale_init();
421
422         VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
423
424         instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
425                                                    radv_debug_options);
426
427         *pInstance = radv_instance_to_handle(instance);
428
429         return VK_SUCCESS;
430 }
431
432 void radv_DestroyInstance(
433         VkInstance                                  _instance,
434         const VkAllocationCallbacks*                pAllocator)
435 {
436         RADV_FROM_HANDLE(radv_instance, instance, _instance);
437
438         if (!instance)
439                 return;
440
441         for (int i = 0; i < instance->physicalDeviceCount; ++i) {
442                 radv_physical_device_finish(instance->physicalDevices + i);
443         }
444
445         VG(VALGRIND_DESTROY_MEMPOOL(instance));
446
447         _mesa_locale_fini();
448
449         vk_free(&instance->alloc, instance);
450 }
451
452 static VkResult
453 radv_enumerate_devices(struct radv_instance *instance)
454 {
455         /* TODO: Check for more devices ? */
456         drmDevicePtr devices[8];
457         VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
458         int max_devices;
459
460         instance->physicalDeviceCount = 0;
461
462         max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
463         if (max_devices < 1)
464                 return VK_ERROR_INCOMPATIBLE_DRIVER;
465
466         for (unsigned i = 0; i < (unsigned)max_devices; i++) {
467                 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
468                     devices[i]->bustype == DRM_BUS_PCI &&
469                     devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
470
471                         result = radv_physical_device_init(instance->physicalDevices +
472                                                            instance->physicalDeviceCount,
473                                                            instance,
474                                                            devices[i]);
475                         if (result == VK_SUCCESS)
476                                 ++instance->physicalDeviceCount;
477                         else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
478                                 break;
479                 }
480         }
481         drmFreeDevices(devices, max_devices);
482
483         return result;
484 }
485
486 VkResult radv_EnumeratePhysicalDevices(
487         VkInstance                                  _instance,
488         uint32_t*                                   pPhysicalDeviceCount,
489         VkPhysicalDevice*                           pPhysicalDevices)
490 {
491         RADV_FROM_HANDLE(radv_instance, instance, _instance);
492         VkResult result;
493
494         if (instance->physicalDeviceCount < 0) {
495                 result = radv_enumerate_devices(instance);
496                 if (result != VK_SUCCESS &&
497                     result != VK_ERROR_INCOMPATIBLE_DRIVER)
498                         return result;
499         }
500
501         if (!pPhysicalDevices) {
502                 *pPhysicalDeviceCount = instance->physicalDeviceCount;
503         } else {
504                 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
505                 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
506                         pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
507         }
508
509         return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
510                                                                      : VK_SUCCESS;
511 }
512
513 void radv_GetPhysicalDeviceFeatures(
514         VkPhysicalDevice                            physicalDevice,
515         VkPhysicalDeviceFeatures*                   pFeatures)
516 {
517         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
518         bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
519         memset(pFeatures, 0, sizeof(*pFeatures));
520
521         *pFeatures = (VkPhysicalDeviceFeatures) {
522                 .robustBufferAccess                       = true,
523                 .fullDrawIndexUint32                      = true,
524                 .imageCubeArray                           = true,
525                 .independentBlend                         = true,
526                 .geometryShader                           = !is_gfx9,
527                 .tessellationShader                       = !is_gfx9,
528                 .sampleRateShading                        = false,
529                 .dualSrcBlend                             = true,
530                 .logicOp                                  = true,
531                 .multiDrawIndirect                        = true,
532                 .drawIndirectFirstInstance                = true,
533                 .depthClamp                               = true,
534                 .depthBiasClamp                           = true,
535                 .fillModeNonSolid                         = true,
536                 .depthBounds                              = true,
537                 .wideLines                                = true,
538                 .largePoints                              = true,
539                 .alphaToOne                               = true,
540                 .multiViewport                            = true,
541                 .samplerAnisotropy                        = true,
542                 .textureCompressionETC2                   = false,
543                 .textureCompressionASTC_LDR               = false,
544                 .textureCompressionBC                     = true,
545                 .occlusionQueryPrecise                    = true,
546                 .pipelineStatisticsQuery                  = true,
547                 .vertexPipelineStoresAndAtomics           = true,
548                 .fragmentStoresAndAtomics                 = true,
549                 .shaderTessellationAndGeometryPointSize   = true,
550                 .shaderImageGatherExtended                = true,
551                 .shaderStorageImageExtendedFormats        = true,
552                 .shaderStorageImageMultisample            = false,
553                 .shaderUniformBufferArrayDynamicIndexing  = true,
554                 .shaderSampledImageArrayDynamicIndexing   = true,
555                 .shaderStorageBufferArrayDynamicIndexing  = true,
556                 .shaderStorageImageArrayDynamicIndexing   = true,
557                 .shaderStorageImageReadWithoutFormat      = true,
558                 .shaderStorageImageWriteWithoutFormat     = true,
559                 .shaderClipDistance                       = true,
560                 .shaderCullDistance                       = true,
561                 .shaderFloat64                            = true,
562                 .shaderInt64                              = false,
563                 .shaderInt16                              = false,
564                 .sparseBinding                            = true,
565                 .variableMultisampleRate                  = true,
566                 .inheritedQueries                         = true,
567         };
568 }
569
570 void radv_GetPhysicalDeviceFeatures2KHR(
571         VkPhysicalDevice                            physicalDevice,
572         VkPhysicalDeviceFeatures2KHR               *pFeatures)
573 {
574         return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
575 }
576
577 static uint32_t radv_get_driver_version()
578 {
579         const char *minor_string = strchr(VERSION, '.');
580         const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
581         int major = atoi(VERSION);
582         int minor = minor_string ? atoi(minor_string + 1) : 0;
583         int patch = patch_string ? atoi(patch_string + 1) : 0;
584         if (strstr(VERSION, "devel")) {
585                 if (patch == 0) {
586                         patch = 99;
587                         if (minor == 0) {
588                                 minor = 99;
589                                 --major;
590                         } else
591                                 --minor;
592                 } else
593                         --patch;
594         }
595         uint32_t version = VK_MAKE_VERSION(major, minor, patch);
596         return version;
597 }
598
599 void radv_GetPhysicalDeviceProperties(
600         VkPhysicalDevice                            physicalDevice,
601         VkPhysicalDeviceProperties*                 pProperties)
602 {
603         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
604         VkSampleCountFlags sample_counts = 0xf;
605
606         /* make sure that the entire descriptor set is addressable with a signed
607          * 32-bit int. So the sum of all limits scaled by descriptor size has to
608          * be at most 2 GiB. the combined image & samples object count as one of
609          * both. This limit is for the pipeline layout, not for the set layout, but
610          * there is no set limit, so we just set a pipeline limit. I don't think
611          * any app is going to hit this soon. */
612         size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
613                   (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
614                    32 /* storage buffer, 32 due to potential space wasted on alignement */ +
615                    32 /* sampler, largest when combined with image */ +
616                    64 /* sampled image */ +
617                    64 /* storage image */);
618
619         VkPhysicalDeviceLimits limits = {
620                 .maxImageDimension1D                      = (1 << 14),
621                 .maxImageDimension2D                      = (1 << 14),
622                 .maxImageDimension3D                      = (1 << 11),
623                 .maxImageDimensionCube                    = (1 << 14),
624                 .maxImageArrayLayers                      = (1 << 11),
625                 .maxTexelBufferElements                   = 128 * 1024 * 1024,
626                 .maxUniformBufferRange                    = UINT32_MAX,
627                 .maxStorageBufferRange                    = UINT32_MAX,
628                 .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
629                 .maxMemoryAllocationCount                 = UINT32_MAX,
630                 .maxSamplerAllocationCount                = 64 * 1024,
631                 .bufferImageGranularity                   = 64, /* A cache line */
632                 .sparseAddressSpaceSize                   = 0xffffffffu, /* buffer max size */
633                 .maxBoundDescriptorSets                   = MAX_SETS,
634                 .maxPerStageDescriptorSamplers            = max_descriptor_set_size,
635                 .maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
636                 .maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
637                 .maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
638                 .maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
639                 .maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
640                 .maxPerStageResources                     = max_descriptor_set_size,
641                 .maxDescriptorSetSamplers                 = max_descriptor_set_size,
642                 .maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
643                 .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
644                 .maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
645                 .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
646                 .maxDescriptorSetSampledImages            = max_descriptor_set_size,
647                 .maxDescriptorSetStorageImages            = max_descriptor_set_size,
648                 .maxDescriptorSetInputAttachments         = max_descriptor_set_size,
649                 .maxVertexInputAttributes                 = 32,
650                 .maxVertexInputBindings                   = 32,
651                 .maxVertexInputAttributeOffset            = 2047,
652                 .maxVertexInputBindingStride              = 2048,
653                 .maxVertexOutputComponents                = 128,
654                 .maxTessellationGenerationLevel           = 64,
655                 .maxTessellationPatchSize                 = 32,
656                 .maxTessellationControlPerVertexInputComponents = 128,
657                 .maxTessellationControlPerVertexOutputComponents = 128,
658                 .maxTessellationControlPerPatchOutputComponents = 120,
659                 .maxTessellationControlTotalOutputComponents = 4096,
660                 .maxTessellationEvaluationInputComponents = 128,
661                 .maxTessellationEvaluationOutputComponents = 128,
662                 .maxGeometryShaderInvocations             = 127,
663                 .maxGeometryInputComponents               = 64,
664                 .maxGeometryOutputComponents              = 128,
665                 .maxGeometryOutputVertices                = 256,
666                 .maxGeometryTotalOutputComponents         = 1024,
667                 .maxFragmentInputComponents               = 128,
668                 .maxFragmentOutputAttachments             = 8,
669                 .maxFragmentDualSrcAttachments            = 1,
670                 .maxFragmentCombinedOutputResources       = 8,
671                 .maxComputeSharedMemorySize               = 32768,
672                 .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
673                 .maxComputeWorkGroupInvocations           = 2048,
674                 .maxComputeWorkGroupSize = {
675                         2048,
676                         2048,
677                         2048
678                 },
679                 .subPixelPrecisionBits                    = 4 /* FIXME */,
680                 .subTexelPrecisionBits                    = 4 /* FIXME */,
681                 .mipmapPrecisionBits                      = 4 /* FIXME */,
682                 .maxDrawIndexedIndexValue                 = UINT32_MAX,
683                 .maxDrawIndirectCount                     = UINT32_MAX,
684                 .maxSamplerLodBias                        = 16,
685                 .maxSamplerAnisotropy                     = 16,
686                 .maxViewports                             = MAX_VIEWPORTS,
687                 .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
688                 .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
689                 .viewportSubPixelBits                     = 13, /* We take a float? */
690                 .minMemoryMapAlignment                    = 4096, /* A page */
691                 .minTexelBufferOffsetAlignment            = 1,
692                 .minUniformBufferOffsetAlignment          = 4,
693                 .minStorageBufferOffsetAlignment          = 4,
694                 .minTexelOffset                           = -32,
695                 .maxTexelOffset                           = 31,
696                 .minTexelGatherOffset                     = -32,
697                 .maxTexelGatherOffset                     = 31,
698                 .minInterpolationOffset                   = -2,
699                 .maxInterpolationOffset                   = 2,
700                 .subPixelInterpolationOffsetBits          = 8,
701                 .maxFramebufferWidth                      = (1 << 14),
702                 .maxFramebufferHeight                     = (1 << 14),
703                 .maxFramebufferLayers                     = (1 << 10),
704                 .framebufferColorSampleCounts             = sample_counts,
705                 .framebufferDepthSampleCounts             = sample_counts,
706                 .framebufferStencilSampleCounts           = sample_counts,
707                 .framebufferNoAttachmentsSampleCounts     = sample_counts,
708                 .maxColorAttachments                      = MAX_RTS,
709                 .sampledImageColorSampleCounts            = sample_counts,
710                 .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
711                 .sampledImageDepthSampleCounts            = sample_counts,
712                 .sampledImageStencilSampleCounts          = sample_counts,
713                 .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
714                 .maxSampleMaskWords                       = 1,
715                 .timestampComputeAndGraphics              = true,
716                 .timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
717                 .maxClipDistances                         = 8,
718                 .maxCullDistances                         = 8,
719                 .maxCombinedClipAndCullDistances          = 8,
720                 .discreteQueuePriorities                  = 1,
721                 .pointSizeRange                           = { 0.125, 255.875 },
722                 .lineWidthRange                           = { 0.0, 7.9921875 },
723                 .pointSizeGranularity                     = (1.0 / 8.0),
724                 .lineWidthGranularity                     = (1.0 / 128.0),
725                 .strictLines                              = false, /* FINISHME */
726                 .standardSampleLocations                  = true,
727                 .optimalBufferCopyOffsetAlignment         = 128,
728                 .optimalBufferCopyRowPitchAlignment       = 128,
729                 .nonCoherentAtomSize                      = 64,
730         };
731
732         *pProperties = (VkPhysicalDeviceProperties) {
733                 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
734                 .driverVersion = radv_get_driver_version(),
735                 .vendorID = 0x1002,
736                 .deviceID = pdevice->rad_info.pci_id,
737                 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
738                 .limits = limits,
739                 .sparseProperties = {0},
740         };
741
742         strcpy(pProperties->deviceName, pdevice->name);
743         memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
744 }
745
746 void radv_GetPhysicalDeviceProperties2KHR(
747         VkPhysicalDevice                            physicalDevice,
748         VkPhysicalDeviceProperties2KHR             *pProperties)
749 {
750         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
751         radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
752
753         vk_foreach_struct(ext, pProperties->pNext) {
754                 switch (ext->sType) {
755                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
756                         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
757                                 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
758                         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
759                         break;
760                 }
761                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX: {
762                         VkPhysicalDeviceIDPropertiesKHX *properties = (VkPhysicalDeviceIDPropertiesKHX*)ext;
763                         radv_device_get_cache_uuid(0, properties->driverUUID);
764                         memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
765                         properties->deviceLUIDValid = false;
766                         break;
767                 }
768                 default:
769                         break;
770                 }
771         }
772 }
773
774 static void radv_get_physical_device_queue_family_properties(
775         struct radv_physical_device*                pdevice,
776         uint32_t*                                   pCount,
777         VkQueueFamilyProperties**                    pQueueFamilyProperties)
778 {
779         int num_queue_families = 1;
780         int idx;
781         if (pdevice->rad_info.num_compute_rings > 0 &&
782             pdevice->rad_info.chip_class >= CIK &&
783             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
784                 num_queue_families++;
785
786         if (pQueueFamilyProperties == NULL) {
787                 *pCount = num_queue_families;
788                 return;
789         }
790
791         if (!*pCount)
792                 return;
793
794         idx = 0;
795         if (*pCount >= 1) {
796                 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
797                         .queueFlags = VK_QUEUE_GRAPHICS_BIT |
798                                       VK_QUEUE_COMPUTE_BIT |
799                                       VK_QUEUE_TRANSFER_BIT |
800                                       VK_QUEUE_SPARSE_BINDING_BIT,
801                         .queueCount = 1,
802                         .timestampValidBits = 64,
803                         .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
804                 };
805                 idx++;
806         }
807
808         if (pdevice->rad_info.num_compute_rings > 0 &&
809             pdevice->rad_info.chip_class >= CIK &&
810             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
811                 if (*pCount > idx) {
812                         *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
813                                 .queueFlags = VK_QUEUE_COMPUTE_BIT |
814                                               VK_QUEUE_TRANSFER_BIT |
815                                               VK_QUEUE_SPARSE_BINDING_BIT,
816                                 .queueCount = pdevice->rad_info.num_compute_rings,
817                                 .timestampValidBits = 64,
818                                 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
819                         };
820                         idx++;
821                 }
822         }
823         *pCount = idx;
824 }
825
826 void radv_GetPhysicalDeviceQueueFamilyProperties(
827         VkPhysicalDevice                            physicalDevice,
828         uint32_t*                                   pCount,
829         VkQueueFamilyProperties*                    pQueueFamilyProperties)
830 {
831         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
832         if (!pQueueFamilyProperties) {
833                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
834                 return;
835         }
836         VkQueueFamilyProperties *properties[] = {
837                 pQueueFamilyProperties + 0,
838                 pQueueFamilyProperties + 1,
839                 pQueueFamilyProperties + 2,
840         };
841         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
842         assert(*pCount <= 3);
843 }
844
845 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
846         VkPhysicalDevice                            physicalDevice,
847         uint32_t*                                   pCount,
848         VkQueueFamilyProperties2KHR                *pQueueFamilyProperties)
849 {
850         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
851         if (!pQueueFamilyProperties) {
852                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
853                 return;
854         }
855         VkQueueFamilyProperties *properties[] = {
856                 &pQueueFamilyProperties[0].queueFamilyProperties,
857                 &pQueueFamilyProperties[1].queueFamilyProperties,
858                 &pQueueFamilyProperties[2].queueFamilyProperties,
859         };
860         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
861         assert(*pCount <= 3);
862 }
863
864 void radv_GetPhysicalDeviceMemoryProperties(
865         VkPhysicalDevice                            physicalDevice,
866         VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
867 {
868         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
869
870         STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
871
872         pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
873         pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
874                 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
875                 .heapIndex = RADV_MEM_HEAP_VRAM,
876         };
877         pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
878                 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
879                 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
880                 .heapIndex = RADV_MEM_HEAP_GTT,
881         };
882         pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
883                 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
884                 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
885                 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
886                 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
887         };
888         pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
889                 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
890                 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
891                 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
892                 .heapIndex = RADV_MEM_HEAP_GTT,
893         };
894
895         STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
896
897         pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
898         pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
899                 .size = physical_device->rad_info.vram_size -
900                                 physical_device->rad_info.vram_vis_size,
901                 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
902         };
903         pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
904                 .size = physical_device->rad_info.vram_vis_size,
905                 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
906         };
907         pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
908                 .size = physical_device->rad_info.gart_size,
909                 .flags = 0,
910         };
911 }
912
913 void radv_GetPhysicalDeviceMemoryProperties2KHR(
914         VkPhysicalDevice                            physicalDevice,
915         VkPhysicalDeviceMemoryProperties2KHR       *pMemoryProperties)
916 {
917         return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
918                                                       &pMemoryProperties->memoryProperties);
919 }
920
921 static int
922 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
923                 int queue_family_index, int idx)
924 {
925         queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
926         queue->device = device;
927         queue->queue_family_index = queue_family_index;
928         queue->queue_idx = idx;
929
930         queue->hw_ctx = device->ws->ctx_create(device->ws);
931         if (!queue->hw_ctx)
932                 return VK_ERROR_OUT_OF_HOST_MEMORY;
933
934         return VK_SUCCESS;
935 }
936
937 static void
938 radv_queue_finish(struct radv_queue *queue)
939 {
940         if (queue->hw_ctx)
941                 queue->device->ws->ctx_destroy(queue->hw_ctx);
942
943         if (queue->initial_preamble_cs)
944                 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
945         if (queue->continue_preamble_cs)
946                 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
947         if (queue->descriptor_bo)
948                 queue->device->ws->buffer_destroy(queue->descriptor_bo);
949         if (queue->scratch_bo)
950                 queue->device->ws->buffer_destroy(queue->scratch_bo);
951         if (queue->esgs_ring_bo)
952                 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
953         if (queue->gsvs_ring_bo)
954                 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
955         if (queue->tess_factor_ring_bo)
956                 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
957         if (queue->tess_offchip_ring_bo)
958                 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
959         if (queue->compute_scratch_bo)
960                 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
961 }
962
963 static void
964 radv_device_init_gs_info(struct radv_device *device)
965 {
966         switch (device->physical_device->rad_info.family) {
967         case CHIP_OLAND:
968         case CHIP_HAINAN:
969         case CHIP_KAVERI:
970         case CHIP_KABINI:
971         case CHIP_MULLINS:
972         case CHIP_ICELAND:
973         case CHIP_CARRIZO:
974         case CHIP_STONEY:
975                 device->gs_table_depth = 16;
976                 return;
977         case CHIP_TAHITI:
978         case CHIP_PITCAIRN:
979         case CHIP_VERDE:
980         case CHIP_BONAIRE:
981         case CHIP_HAWAII:
982         case CHIP_TONGA:
983         case CHIP_FIJI:
984         case CHIP_POLARIS10:
985         case CHIP_POLARIS11:
986         case CHIP_POLARIS12:
987         case CHIP_VEGA10:
988         case CHIP_RAVEN:
989                 device->gs_table_depth = 32;
990                 return;
991         default:
992                 unreachable("unknown GPU");
993         }
994 }
995
996 VkResult radv_CreateDevice(
997         VkPhysicalDevice                            physicalDevice,
998         const VkDeviceCreateInfo*                   pCreateInfo,
999         const VkAllocationCallbacks*                pAllocator,
1000         VkDevice*                                   pDevice)
1001 {
1002         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1003         VkResult result;
1004         struct radv_device *device;
1005
1006         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1007                 if (!is_extension_enabled(physical_device->extensions.ext_array,
1008                                         physical_device->extensions.num_ext,
1009                                         pCreateInfo->ppEnabledExtensionNames[i]))
1010                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1011         }
1012
1013         device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
1014                              sizeof(*device), 8,
1015                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1016         if (!device)
1017                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1018
1019         memset(device, 0, sizeof(*device));
1020
1021         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1022         device->instance = physical_device->instance;
1023         device->physical_device = physical_device;
1024
1025         device->debug_flags = device->instance->debug_flags;
1026
1027         device->ws = physical_device->ws;
1028         if (pAllocator)
1029                 device->alloc = *pAllocator;
1030         else
1031                 device->alloc = physical_device->instance->alloc;
1032
1033         for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1034                 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1035                 uint32_t qfi = queue_create->queueFamilyIndex;
1036
1037                 device->queues[qfi] = vk_alloc(&device->alloc,
1038                                                queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1039                 if (!device->queues[qfi]) {
1040                         result = VK_ERROR_OUT_OF_HOST_MEMORY;
1041                         goto fail;
1042                 }
1043
1044                 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1045
1046                 device->queue_count[qfi] = queue_create->queueCount;
1047
1048                 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1049                         result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
1050                         if (result != VK_SUCCESS)
1051                                 goto fail;
1052                 }
1053         }
1054
1055 #if HAVE_LLVM < 0x0400
1056         device->llvm_supports_spill = false;
1057 #else
1058         device->llvm_supports_spill = true;
1059 #endif
1060
1061         /* The maximum number of scratch waves. Scratch space isn't divided
1062          * evenly between CUs. The number is only a function of the number of CUs.
1063          * We can decrease the constant to decrease the scratch buffer size.
1064          *
1065          * sctx->scratch_waves must be >= the maximum posible size of
1066          * 1 threadgroup, so that the hw doesn't hang from being unable
1067          * to start any.
1068          *
1069          * The recommended value is 4 per CU at most. Higher numbers don't
1070          * bring much benefit, but they still occupy chip resources (think
1071          * async compute). I've seen ~2% performance difference between 4 and 32.
1072          */
1073         uint32_t max_threads_per_block = 2048;
1074         device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1075                                      max_threads_per_block / 64);
1076
1077         radv_device_init_gs_info(device);
1078
1079         device->tess_offchip_block_dw_size =
1080                 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1081         device->has_distributed_tess =
1082                 device->physical_device->rad_info.chip_class >= VI &&
1083                 device->physical_device->rad_info.max_se >= 2;
1084
1085         result = radv_device_init_meta(device);
1086         if (result != VK_SUCCESS)
1087                 goto fail;
1088
1089         radv_device_init_msaa(device);
1090
1091         for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1092                 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1093                 switch (family) {
1094                 case RADV_QUEUE_GENERAL:
1095                         radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1096                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1097                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1098                         break;
1099                 case RADV_QUEUE_COMPUTE:
1100                         radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1101                         radeon_emit(device->empty_cs[family], 0);
1102                         break;
1103                 }
1104                 device->ws->cs_finalize(device->empty_cs[family]);
1105
1106                 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
1107                 switch (family) {
1108                 case RADV_QUEUE_GENERAL:
1109                 case RADV_QUEUE_COMPUTE:
1110                         si_cs_emit_cache_flush(device->flush_cs[family],
1111                                                device->physical_device->rad_info.chip_class,
1112                                                NULL, 0,
1113                                                family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1114                                                RADV_CMD_FLAG_INV_ICACHE |
1115                                                RADV_CMD_FLAG_INV_SMEM_L1 |
1116                                                RADV_CMD_FLAG_INV_VMEM_L1 |
1117                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
1118                         break;
1119                 }
1120                 device->ws->cs_finalize(device->flush_cs[family]);
1121
1122                 device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
1123                 switch (family) {
1124                 case RADV_QUEUE_GENERAL:
1125                 case RADV_QUEUE_COMPUTE:
1126                         si_cs_emit_cache_flush(device->flush_shader_cs[family],
1127                                                device->physical_device->rad_info.chip_class,
1128                                                NULL, 0,
1129                                                family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1130                                                family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
1131                                                RADV_CMD_FLAG_INV_ICACHE |
1132                                                RADV_CMD_FLAG_INV_SMEM_L1 |
1133                                                RADV_CMD_FLAG_INV_VMEM_L1 |
1134                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
1135                         break;
1136                 }
1137                 device->ws->cs_finalize(device->flush_shader_cs[family]);
1138         }
1139
1140         if (getenv("RADV_TRACE_FILE")) {
1141                 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1142                                                              RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1143                 if (!device->trace_bo)
1144                         goto fail;
1145
1146                 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1147                 if (!device->trace_id_ptr)
1148                         goto fail;
1149         }
1150
1151         if (device->physical_device->rad_info.chip_class >= CIK)
1152                 cik_create_gfx_config(device);
1153
1154         VkPipelineCacheCreateInfo ci;
1155         ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1156         ci.pNext = NULL;
1157         ci.flags = 0;
1158         ci.pInitialData = NULL;
1159         ci.initialDataSize = 0;
1160         VkPipelineCache pc;
1161         result = radv_CreatePipelineCache(radv_device_to_handle(device),
1162                                           &ci, NULL, &pc);
1163         if (result != VK_SUCCESS)
1164                 goto fail;
1165
1166         device->mem_cache = radv_pipeline_cache_from_handle(pc);
1167
1168         *pDevice = radv_device_to_handle(device);
1169         return VK_SUCCESS;
1170
1171 fail:
1172         if (device->trace_bo)
1173                 device->ws->buffer_destroy(device->trace_bo);
1174
1175         if (device->gfx_init)
1176                 device->ws->buffer_destroy(device->gfx_init);
1177
1178         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1179                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1180                         radv_queue_finish(&device->queues[i][q]);
1181                 if (device->queue_count[i])
1182                         vk_free(&device->alloc, device->queues[i]);
1183         }
1184
1185         vk_free(&device->alloc, device);
1186         return result;
1187 }
1188
1189 void radv_DestroyDevice(
1190         VkDevice                                    _device,
1191         const VkAllocationCallbacks*                pAllocator)
1192 {
1193         RADV_FROM_HANDLE(radv_device, device, _device);
1194
1195         if (!device)
1196                 return;
1197
1198         if (device->trace_bo)
1199                 device->ws->buffer_destroy(device->trace_bo);
1200
1201         if (device->gfx_init)
1202                 device->ws->buffer_destroy(device->gfx_init);
1203
1204         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1205                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1206                         radv_queue_finish(&device->queues[i][q]);
1207                 if (device->queue_count[i])
1208                         vk_free(&device->alloc, device->queues[i]);
1209                 if (device->empty_cs[i])
1210                         device->ws->cs_destroy(device->empty_cs[i]);
1211                 if (device->flush_cs[i])
1212                         device->ws->cs_destroy(device->flush_cs[i]);
1213                 if (device->flush_shader_cs[i])
1214                         device->ws->cs_destroy(device->flush_shader_cs[i]);
1215         }
1216         radv_device_finish_meta(device);
1217
1218         VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1219         radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1220
1221         vk_free(&device->alloc, device);
1222 }
1223
1224 VkResult radv_EnumerateInstanceExtensionProperties(
1225         const char*                                 pLayerName,
1226         uint32_t*                                   pPropertyCount,
1227         VkExtensionProperties*                      pProperties)
1228 {
1229         if (pProperties == NULL) {
1230                 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1231                 return VK_SUCCESS;
1232         }
1233
1234         *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1235         typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1236
1237         if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1238                 return VK_INCOMPLETE;
1239
1240         return VK_SUCCESS;
1241 }
1242
1243 VkResult radv_EnumerateDeviceExtensionProperties(
1244         VkPhysicalDevice                            physicalDevice,
1245         const char*                                 pLayerName,
1246         uint32_t*                                   pPropertyCount,
1247         VkExtensionProperties*                      pProperties)
1248 {
1249         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1250
1251         if (pProperties == NULL) {
1252                 *pPropertyCount = pdevice->extensions.num_ext;
1253                 return VK_SUCCESS;
1254         }
1255
1256         *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1257         typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1258
1259         if (*pPropertyCount < pdevice->extensions.num_ext)
1260                 return VK_INCOMPLETE;
1261
1262         return VK_SUCCESS;
1263 }
1264
1265 VkResult radv_EnumerateInstanceLayerProperties(
1266         uint32_t*                                   pPropertyCount,
1267         VkLayerProperties*                          pProperties)
1268 {
1269         if (pProperties == NULL) {
1270                 *pPropertyCount = 0;
1271                 return VK_SUCCESS;
1272         }
1273
1274         /* None supported at this time */
1275         return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1276 }
1277
1278 VkResult radv_EnumerateDeviceLayerProperties(
1279         VkPhysicalDevice                            physicalDevice,
1280         uint32_t*                                   pPropertyCount,
1281         VkLayerProperties*                          pProperties)
1282 {
1283         if (pProperties == NULL) {
1284                 *pPropertyCount = 0;
1285                 return VK_SUCCESS;
1286         }
1287
1288         /* None supported at this time */
1289         return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1290 }
1291
1292 void radv_GetDeviceQueue(
1293         VkDevice                                    _device,
1294         uint32_t                                    queueFamilyIndex,
1295         uint32_t                                    queueIndex,
1296         VkQueue*                                    pQueue)
1297 {
1298         RADV_FROM_HANDLE(radv_device, device, _device);
1299
1300         *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1301 }
1302
1303 static void radv_dump_trace(struct radv_device *device,
1304                             struct radeon_winsys_cs *cs)
1305 {
1306         const char *filename = getenv("RADV_TRACE_FILE");
1307         FILE *f = fopen(filename, "w");
1308         if (!f) {
1309                 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1310                 return;
1311         }
1312
1313         fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1314         device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1315         fclose(f);
1316 }
1317
1318 static void
1319 fill_geom_tess_rings(struct radv_queue *queue,
1320                      uint32_t *map,
1321                      bool add_sample_positions,
1322                      uint32_t esgs_ring_size,
1323                      struct radeon_winsys_bo *esgs_ring_bo,
1324                      uint32_t gsvs_ring_size,
1325                      struct radeon_winsys_bo *gsvs_ring_bo,
1326                      uint32_t tess_factor_ring_size,
1327                      struct radeon_winsys_bo *tess_factor_ring_bo,
1328                      uint32_t tess_offchip_ring_size,
1329                      struct radeon_winsys_bo *tess_offchip_ring_bo)
1330 {
1331         uint64_t esgs_va = 0, gsvs_va = 0;
1332         uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1333         uint32_t *desc = &map[4];
1334
1335         if (esgs_ring_bo)
1336                 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1337         if (gsvs_ring_bo)
1338                 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1339         if (tess_factor_ring_bo)
1340                 tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1341         if (tess_offchip_ring_bo)
1342                 tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
1343
1344         /* stride 0, num records - size, add tid, swizzle, elsize4,
1345            index stride 64 */
1346         desc[0] = esgs_va;
1347         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1348                 S_008F04_STRIDE(0) |
1349                 S_008F04_SWIZZLE_ENABLE(true);
1350         desc[2] = esgs_ring_size;
1351         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1352                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1353                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1354                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1355                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1356                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1357                 S_008F0C_ELEMENT_SIZE(1) |
1358                 S_008F0C_INDEX_STRIDE(3) |
1359                 S_008F0C_ADD_TID_ENABLE(true);
1360
1361         desc += 4;
1362         /* GS entry for ES->GS ring */
1363         /* stride 0, num records - size, elsize0,
1364            index stride 0 */
1365         desc[0] = esgs_va;
1366         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1367                 S_008F04_STRIDE(0) |
1368                 S_008F04_SWIZZLE_ENABLE(false);
1369         desc[2] = esgs_ring_size;
1370         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1371                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1372                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1373                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1374                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1375                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1376                 S_008F0C_ELEMENT_SIZE(0) |
1377                 S_008F0C_INDEX_STRIDE(0) |
1378                 S_008F0C_ADD_TID_ENABLE(false);
1379
1380         desc += 4;
1381         /* VS entry for GS->VS ring */
1382         /* stride 0, num records - size, elsize0,
1383            index stride 0 */
1384         desc[0] = gsvs_va;
1385         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1386                 S_008F04_STRIDE(0) |
1387                 S_008F04_SWIZZLE_ENABLE(false);
1388         desc[2] = gsvs_ring_size;
1389         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1390                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1391                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1392                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1393                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1394                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1395                 S_008F0C_ELEMENT_SIZE(0) |
1396                 S_008F0C_INDEX_STRIDE(0) |
1397                 S_008F0C_ADD_TID_ENABLE(false);
1398         desc += 4;
1399
1400         /* stride gsvs_itemsize, num records 64
1401            elsize 4, index stride 16 */
1402         /* shader will patch stride and desc[2] */
1403         desc[0] = gsvs_va;
1404         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1405                 S_008F04_STRIDE(0) |
1406                 S_008F04_SWIZZLE_ENABLE(true);
1407         desc[2] = 0;
1408         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1409                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1410                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1411                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1412                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1413                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1414                 S_008F0C_ELEMENT_SIZE(1) |
1415                 S_008F0C_INDEX_STRIDE(1) |
1416                 S_008F0C_ADD_TID_ENABLE(true);
1417         desc += 4;
1418
1419         desc[0] = tess_factor_va;
1420         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1421                 S_008F04_STRIDE(0) |
1422                 S_008F04_SWIZZLE_ENABLE(false);
1423         desc[2] = tess_factor_ring_size;
1424         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1425                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1426                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1427                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1428                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1429                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1430                 S_008F0C_ELEMENT_SIZE(0) |
1431                 S_008F0C_INDEX_STRIDE(0) |
1432                 S_008F0C_ADD_TID_ENABLE(false);
1433         desc += 4;
1434
1435         desc[0] = tess_offchip_va;
1436         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1437                 S_008F04_STRIDE(0) |
1438                 S_008F04_SWIZZLE_ENABLE(false);
1439         desc[2] = tess_offchip_ring_size;
1440         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1441                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1442                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1443                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1444                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1445                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1446                 S_008F0C_ELEMENT_SIZE(0) |
1447                 S_008F0C_INDEX_STRIDE(0) |
1448                 S_008F0C_ADD_TID_ENABLE(false);
1449         desc += 4;
1450
1451         /* add sample positions after all rings */
1452         memcpy(desc, queue->device->sample_locations_1x, 8);
1453         desc += 2;
1454         memcpy(desc, queue->device->sample_locations_2x, 16);
1455         desc += 4;
1456         memcpy(desc, queue->device->sample_locations_4x, 32);
1457         desc += 8;
1458         memcpy(desc, queue->device->sample_locations_8x, 64);
1459         desc += 16;
1460         memcpy(desc, queue->device->sample_locations_16x, 128);
1461 }
1462
1463 static unsigned
1464 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1465 {
1466         bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1467                 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1468                 device->physical_device->rad_info.family != CHIP_STONEY;
1469         unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1470         unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1471                 device->physical_device->rad_info.max_se;
1472         unsigned offchip_granularity;
1473         unsigned hs_offchip_param;
1474         switch (device->tess_offchip_block_dw_size) {
1475         default:
1476                 assert(0);
1477                 /* fall through */
1478         case 8192:
1479                 offchip_granularity = V_03093C_X_8K_DWORDS;
1480                 break;
1481         case 4096:
1482                 offchip_granularity = V_03093C_X_4K_DWORDS;
1483                 break;
1484         }
1485
1486         switch (device->physical_device->rad_info.chip_class) {
1487         case SI:
1488                 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1489                 break;
1490         case CIK:
1491         case VI:
1492         case GFX9:
1493         default:
1494                 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1495                 break;
1496         }
1497
1498         *max_offchip_buffers_p = max_offchip_buffers;
1499         if (device->physical_device->rad_info.chip_class >= CIK) {
1500                 if (device->physical_device->rad_info.chip_class >= VI)
1501                         --max_offchip_buffers;
1502                 hs_offchip_param =
1503                         S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1504                         S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1505         } else {
1506                 hs_offchip_param =
1507                         S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1508         }
1509         return hs_offchip_param;
1510 }
1511
1512 static VkResult
1513 radv_get_preamble_cs(struct radv_queue *queue,
1514                      uint32_t scratch_size,
1515                      uint32_t compute_scratch_size,
1516                      uint32_t esgs_ring_size,
1517                      uint32_t gsvs_ring_size,
1518                      bool needs_tess_rings,
1519                      bool needs_sample_positions,
1520                      struct radeon_winsys_cs **initial_preamble_cs,
1521                      struct radeon_winsys_cs **continue_preamble_cs)
1522 {
1523         struct radeon_winsys_bo *scratch_bo = NULL;
1524         struct radeon_winsys_bo *descriptor_bo = NULL;
1525         struct radeon_winsys_bo *compute_scratch_bo = NULL;
1526         struct radeon_winsys_bo *esgs_ring_bo = NULL;
1527         struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1528         struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1529         struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1530         struct radeon_winsys_cs *dest_cs[2] = {0};
1531         bool add_tess_rings = false, add_sample_positions = false;
1532         unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1533         unsigned max_offchip_buffers;
1534         unsigned hs_offchip_param = 0;
1535         if (!queue->has_tess_rings) {
1536                 if (needs_tess_rings)
1537                         add_tess_rings = true;
1538         }
1539         if (!queue->has_sample_positions) {
1540                 if (needs_sample_positions)
1541                         add_sample_positions = true;
1542         }
1543         tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1544         hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1545                                                      &max_offchip_buffers);
1546         tess_offchip_ring_size = max_offchip_buffers *
1547                 queue->device->tess_offchip_block_dw_size * 4;
1548
1549         if (scratch_size <= queue->scratch_size &&
1550             compute_scratch_size <= queue->compute_scratch_size &&
1551             esgs_ring_size <= queue->esgs_ring_size &&
1552             gsvs_ring_size <= queue->gsvs_ring_size &&
1553             !add_tess_rings && !add_sample_positions &&
1554             queue->initial_preamble_cs) {
1555                 *initial_preamble_cs = queue->initial_preamble_cs;
1556                 *continue_preamble_cs = queue->continue_preamble_cs;
1557                 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1558                         *continue_preamble_cs = NULL;
1559                 return VK_SUCCESS;
1560         }
1561
1562         if (scratch_size > queue->scratch_size) {
1563                 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1564                                                               scratch_size,
1565                                                               4096,
1566                                                               RADEON_DOMAIN_VRAM,
1567                                                               RADEON_FLAG_NO_CPU_ACCESS);
1568                 if (!scratch_bo)
1569                         goto fail;
1570         } else
1571                 scratch_bo = queue->scratch_bo;
1572
1573         if (compute_scratch_size > queue->compute_scratch_size) {
1574                 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1575                                                                       compute_scratch_size,
1576                                                                       4096,
1577                                                                       RADEON_DOMAIN_VRAM,
1578                                                                       RADEON_FLAG_NO_CPU_ACCESS);
1579                 if (!compute_scratch_bo)
1580                         goto fail;
1581
1582         } else
1583                 compute_scratch_bo = queue->compute_scratch_bo;
1584
1585         if (esgs_ring_size > queue->esgs_ring_size) {
1586                 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1587                                                                 esgs_ring_size,
1588                                                                 4096,
1589                                                                 RADEON_DOMAIN_VRAM,
1590                                                                 RADEON_FLAG_NO_CPU_ACCESS);
1591                 if (!esgs_ring_bo)
1592                         goto fail;
1593         } else {
1594                 esgs_ring_bo = queue->esgs_ring_bo;
1595                 esgs_ring_size = queue->esgs_ring_size;
1596         }
1597
1598         if (gsvs_ring_size > queue->gsvs_ring_size) {
1599                 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1600                                                                 gsvs_ring_size,
1601                                                                 4096,
1602                                                                 RADEON_DOMAIN_VRAM,
1603                                                                 RADEON_FLAG_NO_CPU_ACCESS);
1604                 if (!gsvs_ring_bo)
1605                         goto fail;
1606         } else {
1607                 gsvs_ring_bo = queue->gsvs_ring_bo;
1608                 gsvs_ring_size = queue->gsvs_ring_size;
1609         }
1610
1611         if (add_tess_rings) {
1612                 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1613                                                                        tess_factor_ring_size,
1614                                                                        256,
1615                                                                        RADEON_DOMAIN_VRAM,
1616                                                                        RADEON_FLAG_NO_CPU_ACCESS);
1617                 if (!tess_factor_ring_bo)
1618                         goto fail;
1619                 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1620                                                                        tess_offchip_ring_size,
1621                                                                        256,
1622                                                                        RADEON_DOMAIN_VRAM,
1623                                                                        RADEON_FLAG_NO_CPU_ACCESS);
1624                 if (!tess_offchip_ring_bo)
1625                         goto fail;
1626         } else {
1627                 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1628                 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1629         }
1630
1631         if (scratch_bo != queue->scratch_bo ||
1632             esgs_ring_bo != queue->esgs_ring_bo ||
1633             gsvs_ring_bo != queue->gsvs_ring_bo ||
1634             tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1635             tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1636                 uint32_t size = 0;
1637                 if (gsvs_ring_bo || esgs_ring_bo ||
1638                     tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1639                         size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1640                         if (add_sample_positions)
1641                                 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1642                 }
1643                 else if (scratch_bo)
1644                         size = 8; /* 2 dword */
1645
1646                 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1647                                                                  size,
1648                                                                  4096,
1649                                                                  RADEON_DOMAIN_VRAM,
1650                                                                  RADEON_FLAG_CPU_ACCESS);
1651                 if (!descriptor_bo)
1652                         goto fail;
1653         } else
1654                 descriptor_bo = queue->descriptor_bo;
1655
1656         for(int i = 0; i < 2; ++i) {
1657                 struct radeon_winsys_cs *cs = NULL;
1658                 cs = queue->device->ws->cs_create(queue->device->ws,
1659                                                   queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1660                 if (!cs)
1661                         goto fail;
1662
1663                 dest_cs[i] = cs;
1664
1665                 if (scratch_bo)
1666                         queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1667
1668                 if (esgs_ring_bo)
1669                         queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1670
1671                 if (gsvs_ring_bo)
1672                         queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1673
1674                 if (tess_factor_ring_bo)
1675                         queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1676
1677                 if (tess_offchip_ring_bo)
1678                         queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1679
1680                 if (descriptor_bo)
1681                         queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1682
1683                 if (descriptor_bo != queue->descriptor_bo) {
1684                         uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1685
1686                         if (scratch_bo) {
1687                                 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1688                                 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1689                                                  S_008F04_SWIZZLE_ENABLE(1);
1690                                 map[0] = scratch_va;
1691                                 map[1] = rsrc1;
1692                         }
1693
1694                         if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1695                             add_sample_positions)
1696                                 fill_geom_tess_rings(queue, map, add_sample_positions,
1697                                                      esgs_ring_size, esgs_ring_bo,
1698                                                      gsvs_ring_size, gsvs_ring_bo,
1699                                                      tess_factor_ring_size, tess_factor_ring_bo,
1700                                                      tess_offchip_ring_size, tess_offchip_ring_bo);
1701
1702                         queue->device->ws->buffer_unmap(descriptor_bo);
1703                 }
1704
1705                 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1706                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1707                         radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1708                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1709                         radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1710                 }
1711
1712                 if (esgs_ring_bo || gsvs_ring_bo) {
1713                         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1714                                 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1715                                 radeon_emit(cs, esgs_ring_size >> 8);
1716                                 radeon_emit(cs, gsvs_ring_size >> 8);
1717                         } else {
1718                                 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1719                                 radeon_emit(cs, esgs_ring_size >> 8);
1720                                 radeon_emit(cs, gsvs_ring_size >> 8);
1721                         }
1722                 }
1723
1724                 if (tess_factor_ring_bo) {
1725                         uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1726                         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1727                                 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1728                                                        S_030938_SIZE(tess_factor_ring_size / 4));
1729                                 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1730                                                        tf_va >> 8);
1731                                 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1732                                         radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1733                                                                tf_va >> 40);
1734                                 }
1735                                 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1736                         } else {
1737                                 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1738                                                       S_008988_SIZE(tess_factor_ring_size / 4));
1739                                 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1740                                                       tf_va >> 8);
1741                                 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1742                                                       hs_offchip_param);
1743                         }
1744                 }
1745
1746                 if (descriptor_bo) {
1747                         uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1748                                            R_00B130_SPI_SHADER_USER_DATA_VS_0,
1749                                            R_00B230_SPI_SHADER_USER_DATA_GS_0,
1750                                            R_00B330_SPI_SHADER_USER_DATA_ES_0,
1751                                            R_00B430_SPI_SHADER_USER_DATA_HS_0,
1752                                            R_00B530_SPI_SHADER_USER_DATA_LS_0};
1753
1754                         uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1755
1756                         for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1757                                 radeon_set_sh_reg_seq(cs, regs[i], 2);
1758                                 radeon_emit(cs, va);
1759                                 radeon_emit(cs, va >> 32);
1760                         }
1761                 }
1762
1763                 if (compute_scratch_bo) {
1764                         uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1765                         uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1766                                          S_008F04_SWIZZLE_ENABLE(1);
1767
1768                         queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1769
1770                         radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1771                         radeon_emit(cs, scratch_va);
1772                         radeon_emit(cs, rsrc1);
1773                 }
1774
1775                 if (!i) {
1776                         si_cs_emit_cache_flush(cs,
1777                                                queue->device->physical_device->rad_info.chip_class,
1778                                                NULL, 0,
1779                                                queue->queue_family_index == RING_COMPUTE &&
1780                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
1781                                                RADV_CMD_FLAG_INV_ICACHE |
1782                                                RADV_CMD_FLAG_INV_SMEM_L1 |
1783                                                RADV_CMD_FLAG_INV_VMEM_L1 |
1784                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
1785                 }
1786
1787                 if (!queue->device->ws->cs_finalize(cs))
1788                         goto fail;
1789         }
1790
1791         if (queue->initial_preamble_cs)
1792                         queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1793
1794         if (queue->continue_preamble_cs)
1795                         queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1796
1797         queue->initial_preamble_cs = dest_cs[0];
1798         queue->continue_preamble_cs = dest_cs[1];
1799
1800         if (scratch_bo != queue->scratch_bo) {
1801                 if (queue->scratch_bo)
1802                         queue->device->ws->buffer_destroy(queue->scratch_bo);
1803                 queue->scratch_bo = scratch_bo;
1804                 queue->scratch_size = scratch_size;
1805         }
1806
1807         if (compute_scratch_bo != queue->compute_scratch_bo) {
1808                 if (queue->compute_scratch_bo)
1809                         queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1810                 queue->compute_scratch_bo = compute_scratch_bo;
1811                 queue->compute_scratch_size = compute_scratch_size;
1812         }
1813
1814         if (esgs_ring_bo != queue->esgs_ring_bo) {
1815                 if (queue->esgs_ring_bo)
1816                         queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1817                 queue->esgs_ring_bo = esgs_ring_bo;
1818                 queue->esgs_ring_size = esgs_ring_size;
1819         }
1820
1821         if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1822                 if (queue->gsvs_ring_bo)
1823                         queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1824                 queue->gsvs_ring_bo = gsvs_ring_bo;
1825                 queue->gsvs_ring_size = gsvs_ring_size;
1826         }
1827
1828         if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1829                 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1830         }
1831
1832         if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1833                 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1834                 queue->has_tess_rings = true;
1835         }
1836
1837         if (descriptor_bo != queue->descriptor_bo) {
1838                 if (queue->descriptor_bo)
1839                         queue->device->ws->buffer_destroy(queue->descriptor_bo);
1840
1841                 queue->descriptor_bo = descriptor_bo;
1842         }
1843
1844         if (add_sample_positions)
1845                 queue->has_sample_positions = true;
1846
1847         *initial_preamble_cs = queue->initial_preamble_cs;
1848         *continue_preamble_cs = queue->continue_preamble_cs;
1849         if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1850                         *continue_preamble_cs = NULL;
1851         return VK_SUCCESS;
1852 fail:
1853         for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1854                 if (dest_cs[i])
1855                         queue->device->ws->cs_destroy(dest_cs[i]);
1856         if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1857                 queue->device->ws->buffer_destroy(descriptor_bo);
1858         if (scratch_bo && scratch_bo != queue->scratch_bo)
1859                 queue->device->ws->buffer_destroy(scratch_bo);
1860         if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1861                 queue->device->ws->buffer_destroy(compute_scratch_bo);
1862         if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1863                 queue->device->ws->buffer_destroy(esgs_ring_bo);
1864         if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1865                 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1866         if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1867                 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1868         if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1869                 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1870         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1871 }
1872
1873 VkResult radv_QueueSubmit(
1874         VkQueue                                     _queue,
1875         uint32_t                                    submitCount,
1876         const VkSubmitInfo*                         pSubmits,
1877         VkFence                                     _fence)
1878 {
1879         RADV_FROM_HANDLE(radv_queue, queue, _queue);
1880         RADV_FROM_HANDLE(radv_fence, fence, _fence);
1881         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1882         struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1883         int ret;
1884         uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1885         uint32_t scratch_size = 0;
1886         uint32_t compute_scratch_size = 0;
1887         uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1888         struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1889         VkResult result;
1890         bool fence_emitted = false;
1891         bool tess_rings_needed = false;
1892         bool sample_positions_needed = false;
1893
1894         /* Do this first so failing to allocate scratch buffers can't result in
1895          * partially executed submissions. */
1896         for (uint32_t i = 0; i < submitCount; i++) {
1897                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1898                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1899                                          pSubmits[i].pCommandBuffers[j]);
1900
1901                         scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1902                         compute_scratch_size = MAX2(compute_scratch_size,
1903                                                     cmd_buffer->compute_scratch_size_needed);
1904                         esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1905                         gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1906                         tess_rings_needed |= cmd_buffer->tess_rings_needed;
1907                         sample_positions_needed |= cmd_buffer->sample_positions_needed;
1908                 }
1909         }
1910
1911         result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1912                                       esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1913                                       sample_positions_needed,
1914                                       &initial_preamble_cs, &continue_preamble_cs);
1915         if (result != VK_SUCCESS)
1916                 return result;
1917
1918         for (uint32_t i = 0; i < submitCount; i++) {
1919                 struct radeon_winsys_cs **cs_array;
1920                 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1921                 bool can_patch = !do_flush;
1922                 uint32_t advance;
1923
1924                 if (!pSubmits[i].commandBufferCount) {
1925                         if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1926                                 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1927                                                                    &queue->device->empty_cs[queue->queue_family_index],
1928                                                                    1, NULL, NULL,
1929                                                                    (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1930                                                                    pSubmits[i].waitSemaphoreCount,
1931                                                                    (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1932                                                                    pSubmits[i].signalSemaphoreCount,
1933                                                                    false, base_fence);
1934                                 if (ret) {
1935                                         radv_loge("failed to submit CS %d\n", i);
1936                                         abort();
1937                                 }
1938                                 fence_emitted = true;
1939                         }
1940                         continue;
1941                 }
1942
1943                 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1944                                                 (pSubmits[i].commandBufferCount + do_flush));
1945
1946                 if(do_flush)
1947                         cs_array[0] = pSubmits[i].waitSemaphoreCount ?
1948                                 queue->device->flush_shader_cs[queue->queue_family_index] :
1949                                 queue->device->flush_cs[queue->queue_family_index];
1950
1951                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1952                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1953                                          pSubmits[i].pCommandBuffers[j]);
1954                         assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1955
1956                         cs_array[j + do_flush] = cmd_buffer->cs;
1957                         if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1958                                 can_patch = false;
1959                 }
1960
1961                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1962                         advance = MIN2(max_cs_submission,
1963                                        pSubmits[i].commandBufferCount + do_flush - j);
1964                         bool b = j == 0;
1965                         bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1966
1967                         if (queue->device->trace_bo)
1968                                 *queue->device->trace_id_ptr = 0;
1969
1970                         ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1971                                                         advance, initial_preamble_cs, continue_preamble_cs,
1972                                                         (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1973                                                         b ? pSubmits[i].waitSemaphoreCount : 0,
1974                                                         (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1975                                                         e ? pSubmits[i].signalSemaphoreCount : 0,
1976                                                         can_patch, base_fence);
1977
1978                         if (ret) {
1979                                 radv_loge("failed to submit CS %d\n", i);
1980                                 abort();
1981                         }
1982                         fence_emitted = true;
1983                         if (queue->device->trace_bo) {
1984                                 bool success = queue->device->ws->ctx_wait_idle(
1985                                                         queue->hw_ctx,
1986                                                         radv_queue_family_to_ring(
1987                                                                 queue->queue_family_index),
1988                                                         queue->queue_idx);
1989
1990                                 if (!success) { /* Hang */
1991                                         radv_dump_trace(queue->device, cs_array[j]);
1992                                         abort();
1993                                 }
1994                         }
1995                 }
1996                 free(cs_array);
1997         }
1998
1999         if (fence) {
2000                 if (!fence_emitted)
2001                         ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2002                                                            &queue->device->empty_cs[queue->queue_family_index],
2003                                                            1, NULL, NULL, NULL, 0, NULL, 0,
2004                                                            false, base_fence);
2005
2006                 fence->submitted = true;
2007         }
2008
2009         return VK_SUCCESS;
2010 }
2011
2012 VkResult radv_QueueWaitIdle(
2013         VkQueue                                     _queue)
2014 {
2015         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2016
2017         queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2018                                          radv_queue_family_to_ring(queue->queue_family_index),
2019                                          queue->queue_idx);
2020         return VK_SUCCESS;
2021 }
2022
2023 VkResult radv_DeviceWaitIdle(
2024         VkDevice                                    _device)
2025 {
2026         RADV_FROM_HANDLE(radv_device, device, _device);
2027
2028         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2029                 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2030                         radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2031                 }
2032         }
2033         return VK_SUCCESS;
2034 }
2035
2036 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2037         VkInstance                                  instance,
2038         const char*                                 pName)
2039 {
2040         return radv_lookup_entrypoint(pName);
2041 }
2042
2043 /* The loader wants us to expose a second GetInstanceProcAddr function
2044  * to work around certain LD_PRELOAD issues seen in apps.
2045  */
2046 PUBLIC
2047 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2048         VkInstance                                  instance,
2049         const char*                                 pName);
2050
2051 PUBLIC
2052 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2053         VkInstance                                  instance,
2054         const char*                                 pName)
2055 {
2056         return radv_GetInstanceProcAddr(instance, pName);
2057 }
2058
2059 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2060         VkDevice                                    device,
2061         const char*                                 pName)
2062 {
2063         return radv_lookup_entrypoint(pName);
2064 }
2065
2066 bool radv_get_memory_fd(struct radv_device *device,
2067                         struct radv_device_memory *memory,
2068                         int *pFD)
2069 {
2070         struct radeon_bo_metadata metadata;
2071
2072         if (memory->image) {
2073                 radv_init_metadata(device, memory->image, &metadata);
2074                 device->ws->buffer_set_metadata(memory->bo, &metadata);
2075         }
2076
2077         return device->ws->buffer_get_fd(device->ws, memory->bo,
2078                                          pFD);
2079 }
2080
2081 VkResult radv_AllocateMemory(
2082         VkDevice                                    _device,
2083         const VkMemoryAllocateInfo*                 pAllocateInfo,
2084         const VkAllocationCallbacks*                pAllocator,
2085         VkDeviceMemory*                             pMem)
2086 {
2087         RADV_FROM_HANDLE(radv_device, device, _device);
2088         struct radv_device_memory *mem;
2089         VkResult result;
2090         enum radeon_bo_domain domain;
2091         uint32_t flags = 0;
2092
2093         assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2094
2095         if (pAllocateInfo->allocationSize == 0) {
2096                 /* Apparently, this is allowed */
2097                 *pMem = VK_NULL_HANDLE;
2098                 return VK_SUCCESS;
2099         }
2100
2101         const VkImportMemoryFdInfoKHX *import_info =
2102                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHX);
2103         const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info =
2104                 vk_find_struct_const(pAllocateInfo->pNext, DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV);
2105
2106         mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2107                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2108         if (mem == NULL)
2109                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2110
2111         if (dedicate_info) {
2112                 mem->image = radv_image_from_handle(dedicate_info->image);
2113                 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2114         } else {
2115                 mem->image = NULL;
2116                 mem->buffer = NULL;
2117         }
2118
2119         if (import_info) {
2120                 assert(import_info->handleType ==
2121                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
2122                 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2123                                                      NULL, NULL);
2124                 if (!mem->bo)
2125                         goto fail;
2126                 else
2127                         goto out_success;
2128         }
2129
2130         uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2131         if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2132             pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2133                 domain = RADEON_DOMAIN_GTT;
2134         else
2135                 domain = RADEON_DOMAIN_VRAM;
2136
2137         if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2138                 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2139         else
2140                 flags |= RADEON_FLAG_CPU_ACCESS;
2141
2142         if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2143                 flags |= RADEON_FLAG_GTT_WC;
2144
2145         mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
2146                                                domain, flags);
2147
2148         if (!mem->bo) {
2149                 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2150                 goto fail;
2151         }
2152         mem->type_index = pAllocateInfo->memoryTypeIndex;
2153 out_success:
2154         *pMem = radv_device_memory_to_handle(mem);
2155
2156         return VK_SUCCESS;
2157
2158 fail:
2159         vk_free2(&device->alloc, pAllocator, mem);
2160
2161         return result;
2162 }
2163
2164 void radv_FreeMemory(
2165         VkDevice                                    _device,
2166         VkDeviceMemory                              _mem,
2167         const VkAllocationCallbacks*                pAllocator)
2168 {
2169         RADV_FROM_HANDLE(radv_device, device, _device);
2170         RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2171
2172         if (mem == NULL)
2173                 return;
2174
2175         device->ws->buffer_destroy(mem->bo);
2176         mem->bo = NULL;
2177
2178         vk_free2(&device->alloc, pAllocator, mem);
2179 }
2180
2181 VkResult radv_MapMemory(
2182         VkDevice                                    _device,
2183         VkDeviceMemory                              _memory,
2184         VkDeviceSize                                offset,
2185         VkDeviceSize                                size,
2186         VkMemoryMapFlags                            flags,
2187         void**                                      ppData)
2188 {
2189         RADV_FROM_HANDLE(radv_device, device, _device);
2190         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2191
2192         if (mem == NULL) {
2193                 *ppData = NULL;
2194                 return VK_SUCCESS;
2195         }
2196
2197         *ppData = device->ws->buffer_map(mem->bo);
2198         if (*ppData) {
2199                 *ppData += offset;
2200                 return VK_SUCCESS;
2201         }
2202
2203         return VK_ERROR_MEMORY_MAP_FAILED;
2204 }
2205
2206 void radv_UnmapMemory(
2207         VkDevice                                    _device,
2208         VkDeviceMemory                              _memory)
2209 {
2210         RADV_FROM_HANDLE(radv_device, device, _device);
2211         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2212
2213         if (mem == NULL)
2214                 return;
2215
2216         device->ws->buffer_unmap(mem->bo);
2217 }
2218
2219 VkResult radv_FlushMappedMemoryRanges(
2220         VkDevice                                    _device,
2221         uint32_t                                    memoryRangeCount,
2222         const VkMappedMemoryRange*                  pMemoryRanges)
2223 {
2224         return VK_SUCCESS;
2225 }
2226
2227 VkResult radv_InvalidateMappedMemoryRanges(
2228         VkDevice                                    _device,
2229         uint32_t                                    memoryRangeCount,
2230         const VkMappedMemoryRange*                  pMemoryRanges)
2231 {
2232         return VK_SUCCESS;
2233 }
2234
2235 void radv_GetBufferMemoryRequirements(
2236         VkDevice                                    device,
2237         VkBuffer                                    _buffer,
2238         VkMemoryRequirements*                       pMemoryRequirements)
2239 {
2240         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2241
2242         pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2243
2244         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2245                 pMemoryRequirements->alignment = 4096;
2246         else
2247                 pMemoryRequirements->alignment = 16;
2248
2249         pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2250 }
2251
2252 void radv_GetImageMemoryRequirements(
2253         VkDevice                                    device,
2254         VkImage                                     _image,
2255         VkMemoryRequirements*                       pMemoryRequirements)
2256 {
2257         RADV_FROM_HANDLE(radv_image, image, _image);
2258
2259         pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2260
2261         pMemoryRequirements->size = image->size;
2262         pMemoryRequirements->alignment = image->alignment;
2263 }
2264
2265 void radv_GetImageSparseMemoryRequirements(
2266         VkDevice                                    device,
2267         VkImage                                     image,
2268         uint32_t*                                   pSparseMemoryRequirementCount,
2269         VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
2270 {
2271         stub();
2272 }
2273
2274 void radv_GetDeviceMemoryCommitment(
2275         VkDevice                                    device,
2276         VkDeviceMemory                              memory,
2277         VkDeviceSize*                               pCommittedMemoryInBytes)
2278 {
2279         *pCommittedMemoryInBytes = 0;
2280 }
2281
2282 VkResult radv_BindBufferMemory(
2283         VkDevice                                    device,
2284         VkBuffer                                    _buffer,
2285         VkDeviceMemory                              _memory,
2286         VkDeviceSize                                memoryOffset)
2287 {
2288         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2289         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2290
2291         if (mem) {
2292                 buffer->bo = mem->bo;
2293                 buffer->offset = memoryOffset;
2294         } else {
2295                 buffer->bo = NULL;
2296                 buffer->offset = 0;
2297         }
2298
2299         return VK_SUCCESS;
2300 }
2301
2302 VkResult radv_BindImageMemory(
2303         VkDevice                                    device,
2304         VkImage                                     _image,
2305         VkDeviceMemory                              _memory,
2306         VkDeviceSize                                memoryOffset)
2307 {
2308         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2309         RADV_FROM_HANDLE(radv_image, image, _image);
2310
2311         if (mem) {
2312                 image->bo = mem->bo;
2313                 image->offset = memoryOffset;
2314         } else {
2315                 image->bo = NULL;
2316                 image->offset = 0;
2317         }
2318
2319         return VK_SUCCESS;
2320 }
2321
2322
2323 static void
2324 radv_sparse_buffer_bind_memory(struct radv_device *device,
2325                                const VkSparseBufferMemoryBindInfo *bind)
2326 {
2327         RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2328
2329         for (uint32_t i = 0; i < bind->bindCount; ++i) {
2330                 struct radv_device_memory *mem = NULL;
2331
2332                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2333                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2334
2335                 device->ws->buffer_virtual_bind(buffer->bo,
2336                                                 bind->pBinds[i].resourceOffset,
2337                                                 bind->pBinds[i].size,
2338                                                 mem ? mem->bo : NULL,
2339                                                 bind->pBinds[i].memoryOffset);
2340         }
2341 }
2342
2343 static void
2344 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2345                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
2346 {
2347         RADV_FROM_HANDLE(radv_image, image, bind->image);
2348
2349         for (uint32_t i = 0; i < bind->bindCount; ++i) {
2350                 struct radv_device_memory *mem = NULL;
2351
2352                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2353                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2354
2355                 device->ws->buffer_virtual_bind(image->bo,
2356                                                 bind->pBinds[i].resourceOffset,
2357                                                 bind->pBinds[i].size,
2358                                                 mem ? mem->bo : NULL,
2359                                                 bind->pBinds[i].memoryOffset);
2360         }
2361 }
2362
2363  VkResult radv_QueueBindSparse(
2364         VkQueue                                     _queue,
2365         uint32_t                                    bindInfoCount,
2366         const VkBindSparseInfo*                     pBindInfo,
2367         VkFence                                     _fence)
2368 {
2369         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2370         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2371         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2372         bool fence_emitted = false;
2373
2374         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2375                 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2376                         radv_sparse_buffer_bind_memory(queue->device,
2377                                                        pBindInfo[i].pBufferBinds + j);
2378                 }
2379
2380                 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2381                         radv_sparse_image_opaque_bind_memory(queue->device,
2382                                                              pBindInfo[i].pImageOpaqueBinds + j);
2383                 }
2384
2385                 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2386                         queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2387                                                      &queue->device->empty_cs[queue->queue_family_index],
2388                                                      1, NULL, NULL,
2389                                                      (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2390                                                      pBindInfo[i].waitSemaphoreCount,
2391                                                      (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2392                                                      pBindInfo[i].signalSemaphoreCount,
2393                                                      false, base_fence);
2394                         fence_emitted = true;
2395                         if (fence)
2396                                 fence->submitted = true;
2397                 }
2398         }
2399
2400         if (fence && !fence_emitted) {
2401                 fence->signalled = true;
2402         }
2403
2404         return VK_SUCCESS;
2405 }
2406
2407 VkResult radv_CreateFence(
2408         VkDevice                                    _device,
2409         const VkFenceCreateInfo*                    pCreateInfo,
2410         const VkAllocationCallbacks*                pAllocator,
2411         VkFence*                                    pFence)
2412 {
2413         RADV_FROM_HANDLE(radv_device, device, _device);
2414         struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2415                                                sizeof(*fence), 8,
2416                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2417
2418         if (!fence)
2419                 return VK_ERROR_OUT_OF_HOST_MEMORY;
2420
2421         memset(fence, 0, sizeof(*fence));
2422         fence->submitted = false;
2423         fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2424         fence->fence = device->ws->create_fence();
2425         if (!fence->fence) {
2426                 vk_free2(&device->alloc, pAllocator, fence);
2427                 return VK_ERROR_OUT_OF_HOST_MEMORY;
2428         }
2429
2430         *pFence = radv_fence_to_handle(fence);
2431
2432         return VK_SUCCESS;
2433 }
2434
2435 void radv_DestroyFence(
2436         VkDevice                                    _device,
2437         VkFence                                     _fence,
2438         const VkAllocationCallbacks*                pAllocator)
2439 {
2440         RADV_FROM_HANDLE(radv_device, device, _device);
2441         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2442
2443         if (!fence)
2444                 return;
2445         device->ws->destroy_fence(fence->fence);
2446         vk_free2(&device->alloc, pAllocator, fence);
2447 }
2448
2449 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2450 {
2451         uint64_t current_time;
2452         struct timespec tv;
2453
2454         clock_gettime(CLOCK_MONOTONIC, &tv);
2455         current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2456
2457         timeout = MIN2(UINT64_MAX - current_time, timeout);
2458
2459         return current_time + timeout;
2460 }
2461
2462 VkResult radv_WaitForFences(
2463         VkDevice                                    _device,
2464         uint32_t                                    fenceCount,
2465         const VkFence*                              pFences,
2466         VkBool32                                    waitAll,
2467         uint64_t                                    timeout)
2468 {
2469         RADV_FROM_HANDLE(radv_device, device, _device);
2470         timeout = radv_get_absolute_timeout(timeout);
2471
2472         if (!waitAll && fenceCount > 1) {
2473                 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2474         }
2475
2476         for (uint32_t i = 0; i < fenceCount; ++i) {
2477                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2478                 bool expired = false;
2479
2480                 if (fence->signalled)
2481                         continue;
2482
2483                 if (!fence->submitted)
2484                         return VK_TIMEOUT;
2485
2486                 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2487                 if (!expired)
2488                         return VK_TIMEOUT;
2489
2490                 fence->signalled = true;
2491         }
2492
2493         return VK_SUCCESS;
2494 }
2495
2496 VkResult radv_ResetFences(VkDevice device,
2497                           uint32_t fenceCount,
2498                           const VkFence *pFences)
2499 {
2500         for (unsigned i = 0; i < fenceCount; ++i) {
2501                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2502                 fence->submitted = fence->signalled = false;
2503         }
2504
2505         return VK_SUCCESS;
2506 }
2507
2508 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2509 {
2510         RADV_FROM_HANDLE(radv_device, device, _device);
2511         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2512
2513         if (fence->signalled)
2514                 return VK_SUCCESS;
2515         if (!fence->submitted)
2516                 return VK_NOT_READY;
2517
2518         if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2519                 return VK_NOT_READY;
2520
2521         return VK_SUCCESS;
2522 }
2523
2524
2525 // Queue semaphore functions
2526
2527 VkResult radv_CreateSemaphore(
2528         VkDevice                                    _device,
2529         const VkSemaphoreCreateInfo*                pCreateInfo,
2530         const VkAllocationCallbacks*                pAllocator,
2531         VkSemaphore*                                pSemaphore)
2532 {
2533         RADV_FROM_HANDLE(radv_device, device, _device);
2534         struct radeon_winsys_sem *sem;
2535
2536         sem = device->ws->create_sem(device->ws);
2537         if (!sem)
2538                 return VK_ERROR_OUT_OF_HOST_MEMORY;
2539
2540         *pSemaphore = radeon_winsys_sem_to_handle(sem);
2541         return VK_SUCCESS;
2542 }
2543
2544 void radv_DestroySemaphore(
2545         VkDevice                                    _device,
2546         VkSemaphore                                 _semaphore,
2547         const VkAllocationCallbacks*                pAllocator)
2548 {
2549         RADV_FROM_HANDLE(radv_device, device, _device);
2550         RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2551         if (!_semaphore)
2552                 return;
2553
2554         device->ws->destroy_sem(sem);
2555 }
2556
2557 VkResult radv_CreateEvent(
2558         VkDevice                                    _device,
2559         const VkEventCreateInfo*                    pCreateInfo,
2560         const VkAllocationCallbacks*                pAllocator,
2561         VkEvent*                                    pEvent)
2562 {
2563         RADV_FROM_HANDLE(radv_device, device, _device);
2564         struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2565                                                sizeof(*event), 8,
2566                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2567
2568         if (!event)
2569                 return VK_ERROR_OUT_OF_HOST_MEMORY;
2570
2571         event->bo = device->ws->buffer_create(device->ws, 8, 8,
2572                                               RADEON_DOMAIN_GTT,
2573                                               RADEON_FLAG_CPU_ACCESS);
2574         if (!event->bo) {
2575                 vk_free2(&device->alloc, pAllocator, event);
2576                 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2577         }
2578
2579         event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2580
2581         *pEvent = radv_event_to_handle(event);
2582
2583         return VK_SUCCESS;
2584 }
2585
2586 void radv_DestroyEvent(
2587         VkDevice                                    _device,
2588         VkEvent                                     _event,
2589         const VkAllocationCallbacks*                pAllocator)
2590 {
2591         RADV_FROM_HANDLE(radv_device, device, _device);
2592         RADV_FROM_HANDLE(radv_event, event, _event);
2593
2594         if (!event)
2595                 return;
2596         device->ws->buffer_destroy(event->bo);
2597         vk_free2(&device->alloc, pAllocator, event);
2598 }
2599
2600 VkResult radv_GetEventStatus(
2601         VkDevice                                    _device,
2602         VkEvent                                     _event)
2603 {
2604         RADV_FROM_HANDLE(radv_event, event, _event);
2605
2606         if (*event->map == 1)
2607                 return VK_EVENT_SET;
2608         return VK_EVENT_RESET;
2609 }
2610
2611 VkResult radv_SetEvent(
2612         VkDevice                                    _device,
2613         VkEvent                                     _event)
2614 {
2615         RADV_FROM_HANDLE(radv_event, event, _event);
2616         *event->map = 1;
2617
2618         return VK_SUCCESS;
2619 }
2620
2621 VkResult radv_ResetEvent(
2622     VkDevice                                    _device,
2623     VkEvent                                     _event)
2624 {
2625         RADV_FROM_HANDLE(radv_event, event, _event);
2626         *event->map = 0;
2627
2628         return VK_SUCCESS;
2629 }
2630
2631 VkResult radv_CreateBuffer(
2632         VkDevice                                    _device,
2633         const VkBufferCreateInfo*                   pCreateInfo,
2634         const VkAllocationCallbacks*                pAllocator,
2635         VkBuffer*                                   pBuffer)
2636 {
2637         RADV_FROM_HANDLE(radv_device, device, _device);
2638         struct radv_buffer *buffer;
2639
2640         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2641
2642         buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2643                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2644         if (buffer == NULL)
2645                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2646
2647         buffer->size = pCreateInfo->size;
2648         buffer->usage = pCreateInfo->usage;
2649         buffer->bo = NULL;
2650         buffer->offset = 0;
2651         buffer->flags = pCreateInfo->flags;
2652
2653         if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2654                 buffer->bo = device->ws->buffer_create(device->ws,
2655                                                        align64(buffer->size, 4096),
2656                                                        4096, 0, RADEON_FLAG_VIRTUAL);
2657                 if (!buffer->bo) {
2658                         vk_free2(&device->alloc, pAllocator, buffer);
2659                         return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2660                 }
2661         }
2662
2663         *pBuffer = radv_buffer_to_handle(buffer);
2664
2665         return VK_SUCCESS;
2666 }
2667
2668 void radv_DestroyBuffer(
2669         VkDevice                                    _device,
2670         VkBuffer                                    _buffer,
2671         const VkAllocationCallbacks*                pAllocator)
2672 {
2673         RADV_FROM_HANDLE(radv_device, device, _device);
2674         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2675
2676         if (!buffer)
2677                 return;
2678
2679         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2680                 device->ws->buffer_destroy(buffer->bo);
2681
2682         vk_free2(&device->alloc, pAllocator, buffer);
2683 }
2684
2685 static inline unsigned
2686 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2687 {
2688         if (stencil)
2689                 return image->surface.u.legacy.stencil_tiling_index[level];
2690         else
2691                 return image->surface.u.legacy.tiling_index[level];
2692 }
2693
2694 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2695 {
2696         return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2697 }
2698
2699 static void
2700 radv_initialise_color_surface(struct radv_device *device,
2701                               struct radv_color_buffer_info *cb,
2702                               struct radv_image_view *iview)
2703 {
2704         const struct vk_format_description *desc;
2705         unsigned ntype, format, swap, endian;
2706         unsigned blend_clamp = 0, blend_bypass = 0;
2707         uint64_t va;
2708         const struct radeon_surf *surf = &iview->image->surface;
2709
2710         desc = vk_format_description(iview->vk_format);
2711
2712         memset(cb, 0, sizeof(*cb));
2713
2714         /* Intensity is implemented as Red, so treat it that way. */
2715         cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2716
2717         va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2718
2719         if (device->physical_device->rad_info.chip_class >= GFX9) {
2720                 struct gfx9_surf_meta_flags meta;
2721                 if (iview->image->dcc_offset)
2722                         meta = iview->image->surface.u.gfx9.dcc;
2723                 else
2724                         meta = iview->image->surface.u.gfx9.cmask;
2725
2726                 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2727                         S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2728                         S_028C74_RB_ALIGNED(meta.rb_aligned) |
2729                         S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2730
2731                 va += iview->image->surface.u.gfx9.surf_offset >> 8;
2732         } else {
2733                 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2734                 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2735
2736                 va += level_info->offset;
2737
2738                 pitch_tile_max = level_info->nblk_x / 8 - 1;
2739                 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2740                 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2741
2742                 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2743                 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2744                 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2745
2746                 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2747                 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2748
2749                 if (iview->image->fmask.size) {
2750                         if (device->physical_device->rad_info.chip_class >= CIK)
2751                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2752                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2753                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2754                 } else {
2755                         /* This must be set for fast clear to work without FMASK. */
2756                         if (device->physical_device->rad_info.chip_class >= CIK)
2757                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2758                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2759                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2760                 }
2761         }
2762
2763         cb->cb_color_base = va >> 8;
2764
2765         /* CMASK variables */
2766         va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2767         va += iview->image->cmask.offset;
2768         cb->cb_color_cmask = va >> 8;
2769
2770         va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2771         va += iview->image->dcc_offset;
2772         cb->cb_dcc_base = va >> 8;
2773
2774         uint32_t max_slice = radv_surface_layer_count(iview);
2775         cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2776                 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2777
2778         if (iview->image->info.samples > 1) {
2779                 unsigned log_samples = util_logbase2(iview->image->info.samples);
2780
2781                 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2782                         S_028C74_NUM_FRAGMENTS(log_samples);
2783         }
2784
2785         if (iview->image->fmask.size) {
2786                 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2787                 cb->cb_color_fmask = va >> 8;
2788         } else {
2789                 cb->cb_color_fmask = cb->cb_color_base;
2790         }
2791
2792         ntype = radv_translate_color_numformat(iview->vk_format,
2793                                                desc,
2794                                                vk_format_get_first_non_void_channel(iview->vk_format));
2795         format = radv_translate_colorformat(iview->vk_format);
2796         if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2797                 radv_finishme("Illegal color\n");
2798         swap = radv_translate_colorswap(iview->vk_format, FALSE);
2799         endian = radv_colorformat_endian_swap(format);
2800
2801         /* blend clamp should be set for all NORM/SRGB types */
2802         if (ntype == V_028C70_NUMBER_UNORM ||
2803             ntype == V_028C70_NUMBER_SNORM ||
2804             ntype == V_028C70_NUMBER_SRGB)
2805                 blend_clamp = 1;
2806
2807         /* set blend bypass according to docs if SINT/UINT or
2808            8/24 COLOR variants */
2809         if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2810             format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2811             format == V_028C70_COLOR_X24_8_32_FLOAT) {
2812                 blend_clamp = 0;
2813                 blend_bypass = 1;
2814         }
2815 #if 0
2816         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2817             (format == V_028C70_COLOR_8 ||
2818              format == V_028C70_COLOR_8_8 ||
2819              format == V_028C70_COLOR_8_8_8_8))
2820                 ->color_is_int8 = true;
2821 #endif
2822         cb->cb_color_info = S_028C70_FORMAT(format) |
2823                 S_028C70_COMP_SWAP(swap) |
2824                 S_028C70_BLEND_CLAMP(blend_clamp) |
2825                 S_028C70_BLEND_BYPASS(blend_bypass) |
2826                 S_028C70_SIMPLE_FLOAT(1) |
2827                 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2828                                     ntype != V_028C70_NUMBER_SNORM &&
2829                                     ntype != V_028C70_NUMBER_SRGB &&
2830                                     format != V_028C70_COLOR_8_24 &&
2831                                     format != V_028C70_COLOR_24_8) |
2832                 S_028C70_NUMBER_TYPE(ntype) |
2833                 S_028C70_ENDIAN(endian);
2834         if (iview->image->info.samples > 1)
2835                 if (iview->image->fmask.size)
2836                         cb->cb_color_info |= S_028C70_COMPRESSION(1);
2837
2838         if (iview->image->cmask.size &&
2839             !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2840                 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2841
2842         if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
2843                 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2844
2845         if (device->physical_device->rad_info.chip_class >= VI) {
2846                 unsigned max_uncompressed_block_size = 2;
2847                 if (iview->image->info.samples > 1) {
2848                         if (iview->image->surface.bpe == 1)
2849                                 max_uncompressed_block_size = 0;
2850                         else if (iview->image->surface.bpe == 2)
2851                                 max_uncompressed_block_size = 1;
2852                 }
2853
2854                 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2855                         S_028C78_INDEPENDENT_64B_BLOCKS(1);
2856         }
2857
2858         /* This must be set for fast clear to work without FMASK. */
2859         if (!iview->image->fmask.size &&
2860             device->physical_device->rad_info.chip_class == SI) {
2861                 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
2862                 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2863         }
2864
2865         if (device->physical_device->rad_info.chip_class >= GFX9) {
2866                 uint32_t max_slice = radv_surface_layer_count(iview);
2867                 unsigned mip0_depth = iview->base_layer + max_slice - 1;
2868
2869                 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
2870                 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
2871                         S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
2872                 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
2873                         S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
2874                         S_028C68_MAX_MIP(iview->image->info.levels);
2875
2876                 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2877
2878         }
2879 }
2880
2881 static void
2882 radv_initialise_ds_surface(struct radv_device *device,
2883                            struct radv_ds_buffer_info *ds,
2884                            struct radv_image_view *iview)
2885 {
2886         unsigned level = iview->base_mip;
2887         unsigned format, stencil_format;
2888         uint64_t va, s_offs, z_offs;
2889         bool stencil_only = false;
2890         memset(ds, 0, sizeof(*ds));
2891         switch (iview->vk_format) {
2892         case VK_FORMAT_D24_UNORM_S8_UINT:
2893         case VK_FORMAT_X8_D24_UNORM_PACK32:
2894                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2895                 ds->offset_scale = 2.0f;
2896                 break;
2897         case VK_FORMAT_D16_UNORM:
2898         case VK_FORMAT_D16_UNORM_S8_UINT:
2899                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2900                 ds->offset_scale = 4.0f;
2901                 break;
2902         case VK_FORMAT_D32_SFLOAT:
2903         case VK_FORMAT_D32_SFLOAT_S8_UINT:
2904                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2905                         S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2906                 ds->offset_scale = 1.0f;
2907                 break;
2908         case VK_FORMAT_S8_UINT:
2909                 stencil_only = true;
2910                 break;
2911         default:
2912                 break;
2913         }
2914
2915         format = radv_translate_dbformat(iview->vk_format);
2916         stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
2917                 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
2918
2919         uint32_t max_slice = radv_surface_layer_count(iview);
2920         ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2921                 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2922
2923         ds->db_htile_data_base = 0;
2924         ds->db_htile_surface = 0;
2925
2926         va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2927         s_offs = z_offs = va;
2928
2929         if (device->physical_device->rad_info.chip_class >= GFX9) {
2930                 assert(iview->image->surface.u.gfx9.surf_offset == 0);
2931                 s_offs += iview->image->surface.u.gfx9.stencil_offset;
2932
2933                 ds->db_z_info = S_028038_FORMAT(format) |
2934                         S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
2935                         S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2936                         S_028038_MAXMIP(iview->image->info.levels - 1);
2937                 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
2938                         S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
2939
2940                 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2941                 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
2942                 ds->db_depth_view |= S_028008_MIPID(level);
2943
2944                 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
2945                         S_02801C_Y_MAX(iview->image->info.height - 1);
2946
2947                 /* Only use HTILE for the first level. */
2948                 if (iview->image->surface.htile_size && !level) {
2949                         ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
2950
2951                         if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
2952                                 /* Use all of the htile_buffer for depth if there's no stencil. */
2953                                 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
2954                         va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2955                                 iview->image->htile_offset;
2956                         ds->db_htile_data_base = va >> 8;
2957                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
2958                                 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
2959                                 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
2960                 }
2961         } else {
2962                 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
2963
2964                 if (stencil_only)
2965                         level_info = &iview->image->surface.u.legacy.stencil_level[level];
2966
2967                 z_offs += iview->image->surface.u.legacy.level[level].offset;
2968                 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
2969
2970                 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2971                 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2972                 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
2973
2974                 if (iview->image->info.samples > 1)
2975                         ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
2976
2977                 if (device->physical_device->rad_info.chip_class >= CIK) {
2978                         struct radeon_info *info = &device->physical_device->rad_info;
2979                         unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
2980                         unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
2981                         unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
2982                         unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2983                         unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2984                         unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2985
2986                         if (stencil_only)
2987                                 tile_mode = stencil_tile_mode;
2988
2989                         ds->db_depth_info |=
2990                                 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2991                                 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2992                                 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2993                                 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2994                                 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2995                                 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2996                         ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2997                         ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2998                 } else {
2999                         unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3000                         ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3001                         tile_mode_index = si_tile_mode_index(iview->image, level, true);
3002                         ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3003                 }
3004
3005                 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3006                         S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3007                 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3008
3009                 if (iview->image->surface.htile_size && !level) {
3010                         ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3011
3012                         if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
3013                                 /* Use all of the htile_buffer for depth if there's no stencil. */
3014                                 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3015
3016                         va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
3017                                 iview->image->htile_offset;
3018                         ds->db_htile_data_base = va >> 8;
3019                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3020                 }
3021         }
3022
3023         ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3024         ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3025 }
3026
3027 VkResult radv_CreateFramebuffer(
3028         VkDevice                                    _device,
3029         const VkFramebufferCreateInfo*              pCreateInfo,
3030         const VkAllocationCallbacks*                pAllocator,
3031         VkFramebuffer*                              pFramebuffer)
3032 {
3033         RADV_FROM_HANDLE(radv_device, device, _device);
3034         struct radv_framebuffer *framebuffer;
3035
3036         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3037
3038         size_t size = sizeof(*framebuffer) +
3039                 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3040         framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3041                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3042         if (framebuffer == NULL)
3043                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3044
3045         framebuffer->attachment_count = pCreateInfo->attachmentCount;
3046         framebuffer->width = pCreateInfo->width;
3047         framebuffer->height = pCreateInfo->height;
3048         framebuffer->layers = pCreateInfo->layers;
3049         for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3050                 VkImageView _iview = pCreateInfo->pAttachments[i];
3051                 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3052                 framebuffer->attachments[i].attachment = iview;
3053                 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3054                         radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3055                 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3056                         radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3057                 }
3058                 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3059                 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3060                 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3061         }
3062
3063         *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3064         return VK_SUCCESS;
3065 }
3066
3067 void radv_DestroyFramebuffer(
3068         VkDevice                                    _device,
3069         VkFramebuffer                               _fb,
3070         const VkAllocationCallbacks*                pAllocator)
3071 {
3072         RADV_FROM_HANDLE(radv_device, device, _device);
3073         RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3074
3075         if (!fb)
3076                 return;
3077         vk_free2(&device->alloc, pAllocator, fb);
3078 }
3079
3080 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3081 {
3082         switch (address_mode) {
3083         case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3084                 return V_008F30_SQ_TEX_WRAP;
3085         case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3086                 return V_008F30_SQ_TEX_MIRROR;
3087         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3088                 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3089         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3090                 return V_008F30_SQ_TEX_CLAMP_BORDER;
3091         case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3092                 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3093         default:
3094                 unreachable("illegal tex wrap mode");
3095                 break;
3096         }
3097 }
3098
3099 static unsigned
3100 radv_tex_compare(VkCompareOp op)
3101 {
3102         switch (op) {
3103         case VK_COMPARE_OP_NEVER:
3104                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3105         case VK_COMPARE_OP_LESS:
3106                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3107         case VK_COMPARE_OP_EQUAL:
3108                 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3109         case VK_COMPARE_OP_LESS_OR_EQUAL:
3110                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3111         case VK_COMPARE_OP_GREATER:
3112                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3113         case VK_COMPARE_OP_NOT_EQUAL:
3114                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3115         case VK_COMPARE_OP_GREATER_OR_EQUAL:
3116                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3117         case VK_COMPARE_OP_ALWAYS:
3118                 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3119         default:
3120                 unreachable("illegal compare mode");
3121                 break;
3122         }
3123 }
3124
3125 static unsigned
3126 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3127 {
3128         switch (filter) {
3129         case VK_FILTER_NEAREST:
3130                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3131                         V_008F38_SQ_TEX_XY_FILTER_POINT);
3132         case VK_FILTER_LINEAR:
3133                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3134                         V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3135         case VK_FILTER_CUBIC_IMG:
3136         default:
3137                 fprintf(stderr, "illegal texture filter");
3138                 return 0;
3139         }
3140 }
3141
3142 static unsigned
3143 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3144 {
3145         switch (mode) {
3146         case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3147                 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3148         case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3149                 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3150         default:
3151                 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3152         }
3153 }
3154
3155 static unsigned
3156 radv_tex_bordercolor(VkBorderColor bcolor)
3157 {
3158         switch (bcolor) {
3159         case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3160         case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3161                 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3162         case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3163         case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3164                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3165         case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3166         case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3167                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3168         default:
3169                 break;
3170         }
3171         return 0;
3172 }
3173
3174 static unsigned
3175 radv_tex_aniso_filter(unsigned filter)
3176 {
3177         if (filter < 2)
3178                 return 0;
3179         if (filter < 4)
3180                 return 1;
3181         if (filter < 8)
3182                 return 2;
3183         if (filter < 16)
3184                 return 3;
3185         return 4;
3186 }
3187
3188 static void
3189 radv_init_sampler(struct radv_device *device,
3190                   struct radv_sampler *sampler,
3191                   const VkSamplerCreateInfo *pCreateInfo)
3192 {
3193         uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3194                                         (uint32_t) pCreateInfo->maxAnisotropy : 0;
3195         uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3196         bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3197
3198         sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3199                              S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3200                              S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3201                              S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3202                              S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3203                              S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3204                              S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3205                              S_008F30_ANISO_BIAS(max_aniso_ratio) |
3206                              S_008F30_DISABLE_CUBE_WRAP(0) |
3207                              S_008F30_COMPAT_MODE(is_vi));
3208         sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3209                              S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3210                              S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3211         sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3212                              S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3213                              S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3214                              S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3215                              S_008F38_MIP_POINT_PRECLAMP(0) |
3216                              S_008F38_DISABLE_LSB_CEIL(1) |
3217                              S_008F38_FILTER_PREC_FIX(1) |
3218                              S_008F38_ANISO_OVERRIDE(is_vi));
3219         sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3220                              S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3221 }
3222
3223 VkResult radv_CreateSampler(
3224         VkDevice                                    _device,
3225         const VkSamplerCreateInfo*                  pCreateInfo,
3226         const VkAllocationCallbacks*                pAllocator,
3227         VkSampler*                                  pSampler)
3228 {
3229         RADV_FROM_HANDLE(radv_device, device, _device);
3230         struct radv_sampler *sampler;
3231
3232         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3233
3234         sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3235                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3236         if (!sampler)
3237                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3238
3239         radv_init_sampler(device, sampler, pCreateInfo);
3240         *pSampler = radv_sampler_to_handle(sampler);
3241
3242         return VK_SUCCESS;
3243 }
3244
3245 void radv_DestroySampler(
3246         VkDevice                                    _device,
3247         VkSampler                                   _sampler,
3248         const VkAllocationCallbacks*                pAllocator)
3249 {
3250         RADV_FROM_HANDLE(radv_device, device, _device);
3251         RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3252
3253         if (!sampler)
3254                 return;
3255         vk_free2(&device->alloc, pAllocator, sampler);
3256 }
3257
3258 /* vk_icd.h does not declare this function, so we declare it here to
3259  * suppress Wmissing-prototypes.
3260  */
3261 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3262 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3263
3264 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3265 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3266 {
3267         /* For the full details on loader interface versioning, see
3268         * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3269         * What follows is a condensed summary, to help you navigate the large and
3270         * confusing official doc.
3271         *
3272         *   - Loader interface v0 is incompatible with later versions. We don't
3273         *     support it.
3274         *
3275         *   - In loader interface v1:
3276         *       - The first ICD entrypoint called by the loader is
3277         *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3278         *         entrypoint.
3279         *       - The ICD must statically expose no other Vulkan symbol unless it is
3280         *         linked with -Bsymbolic.
3281         *       - Each dispatchable Vulkan handle created by the ICD must be
3282         *         a pointer to a struct whose first member is VK_LOADER_DATA. The
3283         *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3284         *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3285         *         vkDestroySurfaceKHR(). The ICD must be capable of working with
3286         *         such loader-managed surfaces.
3287         *
3288         *    - Loader interface v2 differs from v1 in:
3289         *       - The first ICD entrypoint called by the loader is
3290         *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3291         *         statically expose this entrypoint.
3292         *
3293         *    - Loader interface v3 differs from v2 in:
3294         *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3295         *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3296         *          because the loader no longer does so.
3297         */
3298         *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3299         return VK_SUCCESS;
3300 }
3301
3302 VkResult radv_GetMemoryFdKHX(VkDevice _device,
3303                              VkDeviceMemory _memory,
3304                              VkExternalMemoryHandleTypeFlagsKHX handleType,
3305                              int *pFD)
3306 {
3307         RADV_FROM_HANDLE(radv_device, device, _device);
3308         RADV_FROM_HANDLE(radv_device_memory, memory, _memory);
3309
3310         /* We support only one handle type. */
3311         assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
3312
3313         bool ret = radv_get_memory_fd(device, memory, pFD);
3314         if (ret == false)
3315                 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3316         return VK_SUCCESS;
3317 }
3318
3319 VkResult radv_GetMemoryFdPropertiesKHX(VkDevice _device,
3320                                        VkExternalMemoryHandleTypeFlagBitsKHX handleType,
3321                                        int fd,
3322                                        VkMemoryFdPropertiesKHX *pMemoryFdProperties)
3323 {
3324    /* The valid usage section for this function says:
3325     *
3326     *    "handleType must not be one of the handle types defined as opaque."
3327     *
3328     * Since we only handle opaque handles for now, there are no FD properties.
3329     */
3330    return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX;
3331 }