OSDN Git Service

radv: clean up radv_vi_dcc_enabled()
[android-x86/external-mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "addrlib/gfx9/chip/gfx9_enum.h"
48 #include "util/debug.h"
49
50 static int
51 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
52 {
53         uint32_t mesa_timestamp, llvm_timestamp;
54         uint16_t f = family;
55         memset(uuid, 0, VK_UUID_SIZE);
56         if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
57             !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
58                 return -1;
59
60         memcpy(uuid, &mesa_timestamp, 4);
61         memcpy((char*)uuid + 4, &llvm_timestamp, 4);
62         memcpy((char*)uuid + 8, &f, 2);
63         snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
64         return 0;
65 }
66
67 static void
68 radv_get_driver_uuid(void *uuid)
69 {
70         ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
71 }
72
73 static void
74 radv_get_device_uuid(struct radeon_info *info, void *uuid)
75 {
76         ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
77 }
78
79 static void
80 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
81 {
82         const char *chip_string;
83         char llvm_string[32] = {};
84
85         switch (family) {
86         case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
87         case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
88         case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
89         case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
90         case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
91         case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
92         case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
93         case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
94         case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
95         case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
96         case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
97         case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
98         case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
99         case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
100         case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
101         case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
102         case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
103         case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
104         case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
105         case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
106         case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
107         default: chip_string = "AMD RADV unknown"; break;
108         }
109
110         if (HAVE_LLVM > 0) {
111                 snprintf(llvm_string, sizeof(llvm_string),
112                          " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
113                          HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
114         }
115
116         snprintf(name, name_len, "%s%s", chip_string, llvm_string);
117 }
118
119 static void
120 radv_physical_device_init_mem_types(struct radv_physical_device *device)
121 {
122         STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
123         uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
124                                           device->rad_info.vram_vis_size);
125
126         int vram_index = -1, visible_vram_index = -1, gart_index = -1;
127         device->memory_properties.memoryHeapCount = 0;
128         if (device->rad_info.vram_size - visible_vram_size > 0) {
129                 vram_index = device->memory_properties.memoryHeapCount++;
130                 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
131                         .size = device->rad_info.vram_size - visible_vram_size,
132                         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
133                 };
134         }
135         if (visible_vram_size) {
136                 visible_vram_index = device->memory_properties.memoryHeapCount++;
137                 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
138                         .size = visible_vram_size,
139                         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
140                 };
141         }
142         if (device->rad_info.gart_size > 0) {
143                 gart_index = device->memory_properties.memoryHeapCount++;
144                 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
145                         .size = device->rad_info.gart_size,
146                         .flags = 0,
147                 };
148         }
149
150         STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
151         unsigned type_count = 0;
152         if (vram_index >= 0) {
153                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
154                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
155                         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
156                         .heapIndex = vram_index,
157                 };
158         }
159         if (gart_index >= 0) {
160                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
161                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
162                         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
163                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
164                         .heapIndex = gart_index,
165                 };
166         }
167         if (visible_vram_index >= 0) {
168                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
169                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
170                         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
171                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
172                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
173                         .heapIndex = visible_vram_index,
174                 };
175         }
176         if (gart_index >= 0) {
177                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
178                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
179                         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
180                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
181                         VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
182                         .heapIndex = gart_index,
183                 };
184         }
185         device->memory_properties.memoryTypeCount = type_count;
186 }
187
188 static void
189 radv_handle_env_var_force_family(struct radv_physical_device *device)
190 {
191         const char *family = getenv("RADV_FORCE_FAMILY");
192         unsigned i;
193
194         if (!family)
195                 return;
196
197         for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
198                 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
199                         /* Override family and chip_class. */
200                         device->rad_info.family = i;
201
202                         if (i >= CHIP_VEGA10)
203                                 device->rad_info.chip_class = GFX9;
204                         else if (i >= CHIP_TONGA)
205                                 device->rad_info.chip_class = VI;
206                         else if (i >= CHIP_BONAIRE)
207                                 device->rad_info.chip_class = CIK;
208                         else
209                                 device->rad_info.chip_class = SI;
210
211                         return;
212                 }
213         }
214
215         fprintf(stderr, "radv: Unknown family: %s\n", family);
216         exit(1);
217 }
218
219 static VkResult
220 radv_physical_device_init(struct radv_physical_device *device,
221                           struct radv_instance *instance,
222                           drmDevicePtr drm_device)
223 {
224         const char *path = drm_device->nodes[DRM_NODE_RENDER];
225         VkResult result;
226         drmVersionPtr version;
227         int fd;
228
229         fd = open(path, O_RDWR | O_CLOEXEC);
230         if (fd < 0)
231                 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
232
233         version = drmGetVersion(fd);
234         if (!version) {
235                 close(fd);
236                 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
237                                  "failed to get version %s: %m", path);
238         }
239
240         if (strcmp(version->name, "amdgpu")) {
241                 drmFreeVersion(version);
242                 close(fd);
243                 return VK_ERROR_INCOMPATIBLE_DRIVER;
244         }
245         drmFreeVersion(version);
246
247         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
248         device->instance = instance;
249         assert(strlen(path) < ARRAY_SIZE(device->path));
250         strncpy(device->path, path, ARRAY_SIZE(device->path));
251
252         device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
253                                                instance->perftest_flags);
254         if (!device->ws) {
255                 result = VK_ERROR_INCOMPATIBLE_DRIVER;
256                 goto fail;
257         }
258
259         device->local_fd = fd;
260         device->ws->query_info(device->ws, &device->rad_info);
261
262         radv_handle_env_var_force_family(device);
263
264         radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
265
266         if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
267                 device->ws->destroy(device->ws);
268                 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
269                                    "cannot generate UUID");
270                 goto fail;
271         }
272
273         /* These flags affect shader compilation. */
274         uint64_t shader_env_flags =
275                 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
276                 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
277
278         /* The gpu id is already embeded in the uuid so we just pass "radv"
279          * when creating the cache.
280          */
281         char buf[VK_UUID_SIZE * 2 + 1];
282         disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
283         device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
284
285         if (device->rad_info.chip_class < VI ||
286             device->rad_info.chip_class > GFX9)
287                 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
288
289         radv_get_driver_uuid(&device->device_uuid);
290         radv_get_device_uuid(&device->rad_info, &device->device_uuid);
291
292         if (device->rad_info.family == CHIP_STONEY ||
293             device->rad_info.chip_class >= GFX9) {
294                 device->has_rbplus = true;
295                 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
296                                          device->rad_info.family == CHIP_VEGA12;
297         }
298
299         /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
300          * on SI.
301          */
302         device->has_clear_state = device->rad_info.chip_class >= CIK;
303
304         device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
305
306         /* Vega10/Raven need a special workaround for a hardware bug. */
307         device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
308                                   device->rad_info.family == CHIP_RAVEN;
309
310         /* Out-of-order primitive rasterization. */
311         device->has_out_of_order_rast = device->rad_info.chip_class >= VI &&
312                                         device->rad_info.max_se >= 2;
313         device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
314                                             (device->instance->perftest_flags & RADV_PERFTEST_OUT_OF_ORDER);
315
316         radv_physical_device_init_mem_types(device);
317         radv_fill_device_extension_table(device, &device->supported_extensions);
318
319         result = radv_init_wsi(device);
320         if (result != VK_SUCCESS) {
321                 device->ws->destroy(device->ws);
322                 goto fail;
323         }
324
325         return VK_SUCCESS;
326
327 fail:
328         close(fd);
329         return result;
330 }
331
332 static void
333 radv_physical_device_finish(struct radv_physical_device *device)
334 {
335         radv_finish_wsi(device);
336         device->ws->destroy(device->ws);
337         disk_cache_destroy(device->disk_cache);
338         close(device->local_fd);
339 }
340
341 static void *
342 default_alloc_func(void *pUserData, size_t size, size_t align,
343                    VkSystemAllocationScope allocationScope)
344 {
345         return malloc(size);
346 }
347
348 static void *
349 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
350                      size_t align, VkSystemAllocationScope allocationScope)
351 {
352         return realloc(pOriginal, size);
353 }
354
355 static void
356 default_free_func(void *pUserData, void *pMemory)
357 {
358         free(pMemory);
359 }
360
361 static const VkAllocationCallbacks default_alloc = {
362         .pUserData = NULL,
363         .pfnAllocation = default_alloc_func,
364         .pfnReallocation = default_realloc_func,
365         .pfnFree = default_free_func,
366 };
367
368 static const struct debug_control radv_debug_options[] = {
369         {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
370         {"nodcc", RADV_DEBUG_NO_DCC},
371         {"shaders", RADV_DEBUG_DUMP_SHADERS},
372         {"nocache", RADV_DEBUG_NO_CACHE},
373         {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
374         {"nohiz", RADV_DEBUG_NO_HIZ},
375         {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
376         {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
377         {"allbos", RADV_DEBUG_ALL_BOS},
378         {"noibs", RADV_DEBUG_NO_IBS},
379         {"spirv", RADV_DEBUG_DUMP_SPIRV},
380         {"vmfaults", RADV_DEBUG_VM_FAULTS},
381         {"zerovram", RADV_DEBUG_ZERO_VRAM},
382         {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
383         {"nosisched", RADV_DEBUG_NO_SISCHED},
384         {"preoptir", RADV_DEBUG_PREOPTIR},
385         {NULL, 0}
386 };
387
388 const char *
389 radv_get_debug_option_name(int id)
390 {
391         assert(id < ARRAY_SIZE(radv_debug_options) - 1);
392         return radv_debug_options[id].string;
393 }
394
395 static const struct debug_control radv_perftest_options[] = {
396         {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
397         {"sisched", RADV_PERFTEST_SISCHED},
398         {"localbos", RADV_PERFTEST_LOCAL_BOS},
399         {"binning", RADV_PERFTEST_BINNING},
400         {"outoforderrast", RADV_PERFTEST_OUT_OF_ORDER},
401         {NULL, 0}
402 };
403
404 const char *
405 radv_get_perftest_option_name(int id)
406 {
407         assert(id < ARRAY_SIZE(radv_debug_options) - 1);
408         return radv_perftest_options[id].string;
409 }
410
411 static void
412 radv_handle_per_app_options(struct radv_instance *instance,
413                             const VkApplicationInfo *info)
414 {
415         const char *name = info ? info->pApplicationName : NULL;
416
417         if (!name)
418                 return;
419
420         if (!strcmp(name, "Talos - Linux - 32bit") ||
421             !strcmp(name, "Talos - Linux - 64bit")) {
422                 /* Force enable LLVM sisched for Talos because it looks safe
423                  * and it gives few more FPS.
424                  */
425                 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
426         }
427 }
428
429 static int radv_get_instance_extension_index(const char *name)
430 {
431         for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
432                 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
433                         return i;
434         }
435         return -1;
436 }
437
438
439 VkResult radv_CreateInstance(
440         const VkInstanceCreateInfo*                 pCreateInfo,
441         const VkAllocationCallbacks*                pAllocator,
442         VkInstance*                                 pInstance)
443 {
444         struct radv_instance *instance;
445         VkResult result;
446
447         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
448
449         uint32_t client_version;
450         if (pCreateInfo->pApplicationInfo &&
451             pCreateInfo->pApplicationInfo->apiVersion != 0) {
452                 client_version = pCreateInfo->pApplicationInfo->apiVersion;
453         } else {
454                 client_version = VK_MAKE_VERSION(1, 0, 0);
455         }
456
457         if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
458             client_version > VK_MAKE_VERSION(1, 1, 0xfff)) {
459                 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
460                                  "Client requested version %d.%d.%d",
461                                  VK_VERSION_MAJOR(client_version),
462                                  VK_VERSION_MINOR(client_version),
463                                  VK_VERSION_PATCH(client_version));
464         }
465
466         instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
467                               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
468         if (!instance)
469                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
470
471         instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
472
473         if (pAllocator)
474                 instance->alloc = *pAllocator;
475         else
476                 instance->alloc = default_alloc;
477
478         instance->apiVersion = client_version;
479         instance->physicalDeviceCount = -1;
480
481         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
482                 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
483                 int index = radv_get_instance_extension_index(ext_name);
484
485                 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
486                         vk_free2(&default_alloc, pAllocator, instance);
487                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
488                 }
489
490                 instance->enabled_extensions.extensions[index] = true;
491         }
492
493         result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
494         if (result != VK_SUCCESS) {
495                 vk_free2(&default_alloc, pAllocator, instance);
496                 return vk_error(result);
497         }
498
499         _mesa_locale_init();
500
501         VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
502
503         instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
504                                                    radv_debug_options);
505
506         instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
507                                                    radv_perftest_options);
508
509         radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
510
511         if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
512                 /* Disable sisched when the user requests it, this is mostly
513                  * useful when the driver force-enable sisched for the given
514                  * application.
515                  */
516                 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
517         }
518
519         *pInstance = radv_instance_to_handle(instance);
520
521         return VK_SUCCESS;
522 }
523
524 void radv_DestroyInstance(
525         VkInstance                                  _instance,
526         const VkAllocationCallbacks*                pAllocator)
527 {
528         RADV_FROM_HANDLE(radv_instance, instance, _instance);
529
530         if (!instance)
531                 return;
532
533         for (int i = 0; i < instance->physicalDeviceCount; ++i) {
534                 radv_physical_device_finish(instance->physicalDevices + i);
535         }
536
537         VG(VALGRIND_DESTROY_MEMPOOL(instance));
538
539         _mesa_locale_fini();
540
541         vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
542
543         vk_free(&instance->alloc, instance);
544 }
545
546 static VkResult
547 radv_enumerate_devices(struct radv_instance *instance)
548 {
549         /* TODO: Check for more devices ? */
550         drmDevicePtr devices[8];
551         VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
552         int max_devices;
553
554         instance->physicalDeviceCount = 0;
555
556         max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
557         if (max_devices < 1)
558                 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
559
560         for (unsigned i = 0; i < (unsigned)max_devices; i++) {
561                 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
562                     devices[i]->bustype == DRM_BUS_PCI &&
563                     devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
564
565                         result = radv_physical_device_init(instance->physicalDevices +
566                                                            instance->physicalDeviceCount,
567                                                            instance,
568                                                            devices[i]);
569                         if (result == VK_SUCCESS)
570                                 ++instance->physicalDeviceCount;
571                         else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
572                                 break;
573                 }
574         }
575         drmFreeDevices(devices, max_devices);
576
577         return result;
578 }
579
580 VkResult radv_EnumeratePhysicalDevices(
581         VkInstance                                  _instance,
582         uint32_t*                                   pPhysicalDeviceCount,
583         VkPhysicalDevice*                           pPhysicalDevices)
584 {
585         RADV_FROM_HANDLE(radv_instance, instance, _instance);
586         VkResult result;
587
588         if (instance->physicalDeviceCount < 0) {
589                 result = radv_enumerate_devices(instance);
590                 if (result != VK_SUCCESS &&
591                     result != VK_ERROR_INCOMPATIBLE_DRIVER)
592                         return result;
593         }
594
595         if (!pPhysicalDevices) {
596                 *pPhysicalDeviceCount = instance->physicalDeviceCount;
597         } else {
598                 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
599                 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
600                         pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
601         }
602
603         return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
604                                                                      : VK_SUCCESS;
605 }
606
607 VkResult radv_EnumeratePhysicalDeviceGroups(
608     VkInstance                                  _instance,
609     uint32_t*                                   pPhysicalDeviceGroupCount,
610     VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
611 {
612         RADV_FROM_HANDLE(radv_instance, instance, _instance);
613         VkResult result;
614
615         if (instance->physicalDeviceCount < 0) {
616                 result = radv_enumerate_devices(instance);
617                 if (result != VK_SUCCESS &&
618                     result != VK_ERROR_INCOMPATIBLE_DRIVER)
619                         return result;
620         }
621
622         if (!pPhysicalDeviceGroupProperties) {
623                 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
624         } else {
625                 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
626                 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
627                         pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
628                         pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
629                         pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
630                 }
631         }
632         return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
633                                                                           : VK_SUCCESS;
634 }
635
636 void radv_GetPhysicalDeviceFeatures(
637         VkPhysicalDevice                            physicalDevice,
638         VkPhysicalDeviceFeatures*                   pFeatures)
639 {
640         memset(pFeatures, 0, sizeof(*pFeatures));
641
642         *pFeatures = (VkPhysicalDeviceFeatures) {
643                 .robustBufferAccess                       = true,
644                 .fullDrawIndexUint32                      = true,
645                 .imageCubeArray                           = true,
646                 .independentBlend                         = true,
647                 .geometryShader                           = true,
648                 .tessellationShader                       = true,
649                 .sampleRateShading                        = true,
650                 .dualSrcBlend                             = true,
651                 .logicOp                                  = true,
652                 .multiDrawIndirect                        = true,
653                 .drawIndirectFirstInstance                = true,
654                 .depthClamp                               = true,
655                 .depthBiasClamp                           = true,
656                 .fillModeNonSolid                         = true,
657                 .depthBounds                              = true,
658                 .wideLines                                = true,
659                 .largePoints                              = true,
660                 .alphaToOne                               = true,
661                 .multiViewport                            = true,
662                 .samplerAnisotropy                        = true,
663                 .textureCompressionETC2                   = false,
664                 .textureCompressionASTC_LDR               = false,
665                 .textureCompressionBC                     = true,
666                 .occlusionQueryPrecise                    = true,
667                 .pipelineStatisticsQuery                  = true,
668                 .vertexPipelineStoresAndAtomics           = true,
669                 .fragmentStoresAndAtomics                 = true,
670                 .shaderTessellationAndGeometryPointSize   = true,
671                 .shaderImageGatherExtended                = true,
672                 .shaderStorageImageExtendedFormats        = true,
673                 .shaderStorageImageMultisample            = false,
674                 .shaderUniformBufferArrayDynamicIndexing  = true,
675                 .shaderSampledImageArrayDynamicIndexing   = true,
676                 .shaderStorageBufferArrayDynamicIndexing  = true,
677                 .shaderStorageImageArrayDynamicIndexing   = true,
678                 .shaderStorageImageReadWithoutFormat      = true,
679                 .shaderStorageImageWriteWithoutFormat     = true,
680                 .shaderClipDistance                       = true,
681                 .shaderCullDistance                       = true,
682                 .shaderFloat64                            = true,
683                 .shaderInt64                              = true,
684                 .shaderInt16                              = false,
685                 .sparseBinding                            = true,
686                 .variableMultisampleRate                  = true,
687                 .inheritedQueries                         = true,
688         };
689 }
690
691 void radv_GetPhysicalDeviceFeatures2(
692         VkPhysicalDevice                            physicalDevice,
693         VkPhysicalDeviceFeatures2KHR               *pFeatures)
694 {
695         vk_foreach_struct(ext, pFeatures->pNext) {
696                 switch (ext->sType) {
697                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
698                         VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
699                         features->variablePointersStorageBuffer = true;
700                         features->variablePointers = false;
701                         break;
702                 }
703                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
704                         VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
705                         features->multiview = true;
706                         features->multiviewGeometryShader = true;
707                         features->multiviewTessellationShader = true;
708                         break;
709                 }
710                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
711                         VkPhysicalDeviceShaderDrawParameterFeatures *features =
712                             (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
713                         features->shaderDrawParameters = true;
714                         break;
715                 }
716                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
717                         VkPhysicalDeviceProtectedMemoryFeatures *features =
718                             (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
719                         features->protectedMemory = false;
720                         break;
721                 }
722                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
723                         VkPhysicalDevice16BitStorageFeatures *features =
724                             (VkPhysicalDevice16BitStorageFeatures*)ext;
725                         features->storageBuffer16BitAccess = false;
726                         features->uniformAndStorageBuffer16BitAccess = false;
727                         features->storagePushConstant16 = false;
728                         features->storageInputOutput16 = false;
729                         break;
730                 }
731                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
732                         VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
733                             (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
734                         features->samplerYcbcrConversion = false;
735                         break;
736                 }
737                 default:
738                         break;
739                 }
740         }
741         return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
742 }
743
744 void radv_GetPhysicalDeviceProperties(
745         VkPhysicalDevice                            physicalDevice,
746         VkPhysicalDeviceProperties*                 pProperties)
747 {
748         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
749         VkSampleCountFlags sample_counts = 0xf;
750
751         /* make sure that the entire descriptor set is addressable with a signed
752          * 32-bit int. So the sum of all limits scaled by descriptor size has to
753          * be at most 2 GiB. the combined image & samples object count as one of
754          * both. This limit is for the pipeline layout, not for the set layout, but
755          * there is no set limit, so we just set a pipeline limit. I don't think
756          * any app is going to hit this soon. */
757         size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
758                   (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
759                    32 /* storage buffer, 32 due to potential space wasted on alignment */ +
760                    32 /* sampler, largest when combined with image */ +
761                    64 /* sampled image */ +
762                    64 /* storage image */);
763
764         VkPhysicalDeviceLimits limits = {
765                 .maxImageDimension1D                      = (1 << 14),
766                 .maxImageDimension2D                      = (1 << 14),
767                 .maxImageDimension3D                      = (1 << 11),
768                 .maxImageDimensionCube                    = (1 << 14),
769                 .maxImageArrayLayers                      = (1 << 11),
770                 .maxTexelBufferElements                   = 128 * 1024 * 1024,
771                 .maxUniformBufferRange                    = UINT32_MAX,
772                 .maxStorageBufferRange                    = UINT32_MAX,
773                 .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
774                 .maxMemoryAllocationCount                 = UINT32_MAX,
775                 .maxSamplerAllocationCount                = 64 * 1024,
776                 .bufferImageGranularity                   = 64, /* A cache line */
777                 .sparseAddressSpaceSize                   = 0xffffffffu, /* buffer max size */
778                 .maxBoundDescriptorSets                   = MAX_SETS,
779                 .maxPerStageDescriptorSamplers            = max_descriptor_set_size,
780                 .maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
781                 .maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
782                 .maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
783                 .maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
784                 .maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
785                 .maxPerStageResources                     = max_descriptor_set_size,
786                 .maxDescriptorSetSamplers                 = max_descriptor_set_size,
787                 .maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
788                 .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_UNIFORM_BUFFERS,
789                 .maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
790                 .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_STORAGE_BUFFERS,
791                 .maxDescriptorSetSampledImages            = max_descriptor_set_size,
792                 .maxDescriptorSetStorageImages            = max_descriptor_set_size,
793                 .maxDescriptorSetInputAttachments         = max_descriptor_set_size,
794                 .maxVertexInputAttributes                 = 32,
795                 .maxVertexInputBindings                   = 32,
796                 .maxVertexInputAttributeOffset            = 2047,
797                 .maxVertexInputBindingStride              = 2048,
798                 .maxVertexOutputComponents                = 128,
799                 .maxTessellationGenerationLevel           = 64,
800                 .maxTessellationPatchSize                 = 32,
801                 .maxTessellationControlPerVertexInputComponents = 128,
802                 .maxTessellationControlPerVertexOutputComponents = 128,
803                 .maxTessellationControlPerPatchOutputComponents = 120,
804                 .maxTessellationControlTotalOutputComponents = 4096,
805                 .maxTessellationEvaluationInputComponents = 128,
806                 .maxTessellationEvaluationOutputComponents = 128,
807                 .maxGeometryShaderInvocations             = 127,
808                 .maxGeometryInputComponents               = 64,
809                 .maxGeometryOutputComponents              = 128,
810                 .maxGeometryOutputVertices                = 256,
811                 .maxGeometryTotalOutputComponents         = 1024,
812                 .maxFragmentInputComponents               = 128,
813                 .maxFragmentOutputAttachments             = 8,
814                 .maxFragmentDualSrcAttachments            = 1,
815                 .maxFragmentCombinedOutputResources       = 8,
816                 .maxComputeSharedMemorySize               = 32768,
817                 .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
818                 .maxComputeWorkGroupInvocations           = 2048,
819                 .maxComputeWorkGroupSize = {
820                         2048,
821                         2048,
822                         2048
823                 },
824                 .subPixelPrecisionBits                    = 4 /* FIXME */,
825                 .subTexelPrecisionBits                    = 4 /* FIXME */,
826                 .mipmapPrecisionBits                      = 4 /* FIXME */,
827                 .maxDrawIndexedIndexValue                 = UINT32_MAX,
828                 .maxDrawIndirectCount                     = UINT32_MAX,
829                 .maxSamplerLodBias                        = 16,
830                 .maxSamplerAnisotropy                     = 16,
831                 .maxViewports                             = MAX_VIEWPORTS,
832                 .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
833                 .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
834                 .viewportSubPixelBits                     = 13, /* We take a float? */
835                 .minMemoryMapAlignment                    = 4096, /* A page */
836                 .minTexelBufferOffsetAlignment            = 1,
837                 .minUniformBufferOffsetAlignment          = 4,
838                 .minStorageBufferOffsetAlignment          = 4,
839                 .minTexelOffset                           = -32,
840                 .maxTexelOffset                           = 31,
841                 .minTexelGatherOffset                     = -32,
842                 .maxTexelGatherOffset                     = 31,
843                 .minInterpolationOffset                   = -2,
844                 .maxInterpolationOffset                   = 2,
845                 .subPixelInterpolationOffsetBits          = 8,
846                 .maxFramebufferWidth                      = (1 << 14),
847                 .maxFramebufferHeight                     = (1 << 14),
848                 .maxFramebufferLayers                     = (1 << 10),
849                 .framebufferColorSampleCounts             = sample_counts,
850                 .framebufferDepthSampleCounts             = sample_counts,
851                 .framebufferStencilSampleCounts           = sample_counts,
852                 .framebufferNoAttachmentsSampleCounts     = sample_counts,
853                 .maxColorAttachments                      = MAX_RTS,
854                 .sampledImageColorSampleCounts            = sample_counts,
855                 .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
856                 .sampledImageDepthSampleCounts            = sample_counts,
857                 .sampledImageStencilSampleCounts          = sample_counts,
858                 .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
859                 .maxSampleMaskWords                       = 1,
860                 .timestampComputeAndGraphics              = true,
861                 .timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
862                 .maxClipDistances                         = 8,
863                 .maxCullDistances                         = 8,
864                 .maxCombinedClipAndCullDistances          = 8,
865                 .discreteQueuePriorities                  = 1,
866                 .pointSizeRange                           = { 0.125, 255.875 },
867                 .lineWidthRange                           = { 0.0, 7.9921875 },
868                 .pointSizeGranularity                     = (1.0 / 8.0),
869                 .lineWidthGranularity                     = (1.0 / 128.0),
870                 .strictLines                              = false, /* FINISHME */
871                 .standardSampleLocations                  = true,
872                 .optimalBufferCopyOffsetAlignment         = 128,
873                 .optimalBufferCopyRowPitchAlignment       = 128,
874                 .nonCoherentAtomSize                      = 64,
875         };
876
877         *pProperties = (VkPhysicalDeviceProperties) {
878                 .apiVersion = radv_physical_device_api_version(pdevice),
879                 .driverVersion = vk_get_driver_version(),
880                 .vendorID = ATI_VENDOR_ID,
881                 .deviceID = pdevice->rad_info.pci_id,
882                 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
883                 .limits = limits,
884                 .sparseProperties = {0},
885         };
886
887         strcpy(pProperties->deviceName, pdevice->name);
888         memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
889 }
890
891 void radv_GetPhysicalDeviceProperties2(
892         VkPhysicalDevice                            physicalDevice,
893         VkPhysicalDeviceProperties2KHR             *pProperties)
894 {
895         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
896         radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
897
898         vk_foreach_struct(ext, pProperties->pNext) {
899                 switch (ext->sType) {
900                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
901                         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
902                                 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
903                         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
904                         break;
905                 }
906                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
907                         VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
908                         memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
909                         memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
910                         properties->deviceLUIDValid = false;
911                         break;
912                 }
913                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
914                         VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
915                         properties->maxMultiviewViewCount = MAX_VIEWS;
916                         properties->maxMultiviewInstanceIndex = INT_MAX;
917                         break;
918                 }
919                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
920                         VkPhysicalDevicePointClippingPropertiesKHR *properties =
921                             (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
922                         properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
923                         break;
924                 }
925                 case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
926                         VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
927                             (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
928                         properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
929                         break;
930                 }
931                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
932                         VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
933                             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
934                         properties->minImportedHostPointerAlignment = 4096;
935                         break;
936                 }
937                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
938                         VkPhysicalDeviceSubgroupProperties *properties =
939                             (VkPhysicalDeviceSubgroupProperties*)ext;
940                         properties->subgroupSize = 64;
941                         properties->supportedStages = VK_SHADER_STAGE_ALL;
942                         properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
943                         properties->quadOperationsInAllStages = false;
944                         break;
945                 }
946                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
947                         VkPhysicalDeviceMaintenance3Properties *properties =
948                             (VkPhysicalDeviceMaintenance3Properties*)ext;
949                         /* Make sure everything is addressable by a signed 32-bit int, and
950                          * our largest descriptors are 96 bytes. */
951                         properties->maxPerSetDescriptors = (1ull << 31) / 96;
952                         /* Our buffer size fields allow only this much */
953                         properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
954                         break;
955                 }
956                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
957                         VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
958                                 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
959                         /* GFX6-8 only support single channel min/max filter. */
960                         properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
961                         properties->filterMinmaxSingleComponentFormats = true;
962                         break;
963                 }
964                 default:
965                         break;
966                 }
967         }
968 }
969
970 static void radv_get_physical_device_queue_family_properties(
971         struct radv_physical_device*                pdevice,
972         uint32_t*                                   pCount,
973         VkQueueFamilyProperties**                    pQueueFamilyProperties)
974 {
975         int num_queue_families = 1;
976         int idx;
977         if (pdevice->rad_info.num_compute_rings > 0 &&
978             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
979                 num_queue_families++;
980
981         if (pQueueFamilyProperties == NULL) {
982                 *pCount = num_queue_families;
983                 return;
984         }
985
986         if (!*pCount)
987                 return;
988
989         idx = 0;
990         if (*pCount >= 1) {
991                 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
992                         .queueFlags = VK_QUEUE_GRAPHICS_BIT |
993                                       VK_QUEUE_COMPUTE_BIT |
994                                       VK_QUEUE_TRANSFER_BIT |
995                                       VK_QUEUE_SPARSE_BINDING_BIT,
996                         .queueCount = 1,
997                         .timestampValidBits = 64,
998                         .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
999                 };
1000                 idx++;
1001         }
1002
1003         if (pdevice->rad_info.num_compute_rings > 0 &&
1004             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1005                 if (*pCount > idx) {
1006                         *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1007                                 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1008                                               VK_QUEUE_TRANSFER_BIT |
1009                                               VK_QUEUE_SPARSE_BINDING_BIT,
1010                                 .queueCount = pdevice->rad_info.num_compute_rings,
1011                                 .timestampValidBits = 64,
1012                                 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1013                         };
1014                         idx++;
1015                 }
1016         }
1017         *pCount = idx;
1018 }
1019
1020 void radv_GetPhysicalDeviceQueueFamilyProperties(
1021         VkPhysicalDevice                            physicalDevice,
1022         uint32_t*                                   pCount,
1023         VkQueueFamilyProperties*                    pQueueFamilyProperties)
1024 {
1025         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1026         if (!pQueueFamilyProperties) {
1027                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1028                 return;
1029         }
1030         VkQueueFamilyProperties *properties[] = {
1031                 pQueueFamilyProperties + 0,
1032                 pQueueFamilyProperties + 1,
1033                 pQueueFamilyProperties + 2,
1034         };
1035         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1036         assert(*pCount <= 3);
1037 }
1038
1039 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1040         VkPhysicalDevice                            physicalDevice,
1041         uint32_t*                                   pCount,
1042         VkQueueFamilyProperties2KHR                *pQueueFamilyProperties)
1043 {
1044         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1045         if (!pQueueFamilyProperties) {
1046                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1047                 return;
1048         }
1049         VkQueueFamilyProperties *properties[] = {
1050                 &pQueueFamilyProperties[0].queueFamilyProperties,
1051                 &pQueueFamilyProperties[1].queueFamilyProperties,
1052                 &pQueueFamilyProperties[2].queueFamilyProperties,
1053         };
1054         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1055         assert(*pCount <= 3);
1056 }
1057
1058 void radv_GetPhysicalDeviceMemoryProperties(
1059         VkPhysicalDevice                            physicalDevice,
1060         VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
1061 {
1062         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1063
1064         *pMemoryProperties = physical_device->memory_properties;
1065 }
1066
1067 void radv_GetPhysicalDeviceMemoryProperties2(
1068         VkPhysicalDevice                            physicalDevice,
1069         VkPhysicalDeviceMemoryProperties2KHR       *pMemoryProperties)
1070 {
1071         return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1072                                                       &pMemoryProperties->memoryProperties);
1073 }
1074
1075 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1076         VkDevice                                    _device,
1077         VkExternalMemoryHandleTypeFlagBitsKHR       handleType,
1078         const void                                 *pHostPointer,
1079         VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
1080 {
1081         RADV_FROM_HANDLE(radv_device, device, _device);
1082
1083         switch (handleType)
1084         {
1085         case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1086                 const struct radv_physical_device *physical_device = device->physical_device;
1087                 uint32_t memoryTypeBits = 0;
1088                 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1089                         if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1090                                 memoryTypeBits = (1 << i);
1091                                 break;
1092                         }
1093                 }
1094                 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1095                 return VK_SUCCESS;
1096         }
1097         default:
1098                 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1099         }
1100 }
1101
1102 static enum radeon_ctx_priority
1103 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1104 {
1105         /* Default to MEDIUM when a specific global priority isn't requested */
1106         if (!pObj)
1107                 return RADEON_CTX_PRIORITY_MEDIUM;
1108
1109         switch(pObj->globalPriority) {
1110         case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1111                 return RADEON_CTX_PRIORITY_REALTIME;
1112         case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1113                 return RADEON_CTX_PRIORITY_HIGH;
1114         case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1115                 return RADEON_CTX_PRIORITY_MEDIUM;
1116         case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1117                 return RADEON_CTX_PRIORITY_LOW;
1118         default:
1119                 unreachable("Illegal global priority value");
1120                 return RADEON_CTX_PRIORITY_INVALID;
1121         }
1122 }
1123
1124 static int
1125 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1126                 uint32_t queue_family_index, int idx,
1127                 VkDeviceQueueCreateFlags flags,
1128                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1129 {
1130         queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1131         queue->device = device;
1132         queue->queue_family_index = queue_family_index;
1133         queue->queue_idx = idx;
1134         queue->priority = radv_get_queue_global_priority(global_priority);
1135         queue->flags = flags;
1136
1137         queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1138         if (!queue->hw_ctx)
1139                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1140
1141         return VK_SUCCESS;
1142 }
1143
1144 static void
1145 radv_queue_finish(struct radv_queue *queue)
1146 {
1147         if (queue->hw_ctx)
1148                 queue->device->ws->ctx_destroy(queue->hw_ctx);
1149
1150         if (queue->initial_full_flush_preamble_cs)
1151                 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1152         if (queue->initial_preamble_cs)
1153                 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1154         if (queue->continue_preamble_cs)
1155                 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1156         if (queue->descriptor_bo)
1157                 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1158         if (queue->scratch_bo)
1159                 queue->device->ws->buffer_destroy(queue->scratch_bo);
1160         if (queue->esgs_ring_bo)
1161                 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1162         if (queue->gsvs_ring_bo)
1163                 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1164         if (queue->tess_rings_bo)
1165                 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1166         if (queue->compute_scratch_bo)
1167                 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1168 }
1169
1170 static void
1171 radv_device_init_gs_info(struct radv_device *device)
1172 {
1173         switch (device->physical_device->rad_info.family) {
1174         case CHIP_OLAND:
1175         case CHIP_HAINAN:
1176         case CHIP_KAVERI:
1177         case CHIP_KABINI:
1178         case CHIP_MULLINS:
1179         case CHIP_ICELAND:
1180         case CHIP_CARRIZO:
1181         case CHIP_STONEY:
1182                 device->gs_table_depth = 16;
1183                 return;
1184         case CHIP_TAHITI:
1185         case CHIP_PITCAIRN:
1186         case CHIP_VERDE:
1187         case CHIP_BONAIRE:
1188         case CHIP_HAWAII:
1189         case CHIP_TONGA:
1190         case CHIP_FIJI:
1191         case CHIP_POLARIS10:
1192         case CHIP_POLARIS11:
1193         case CHIP_POLARIS12:
1194         case CHIP_VEGA10:
1195         case CHIP_VEGA12:
1196         case CHIP_RAVEN:
1197                 device->gs_table_depth = 32;
1198                 return;
1199         default:
1200                 unreachable("unknown GPU");
1201         }
1202 }
1203
1204 static int radv_get_device_extension_index(const char *name)
1205 {
1206         for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1207                 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1208                         return i;
1209         }
1210         return -1;
1211 }
1212
1213 VkResult radv_CreateDevice(
1214         VkPhysicalDevice                            physicalDevice,
1215         const VkDeviceCreateInfo*                   pCreateInfo,
1216         const VkAllocationCallbacks*                pAllocator,
1217         VkDevice*                                   pDevice)
1218 {
1219         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1220         VkResult result;
1221         struct radv_device *device;
1222
1223         bool keep_shader_info = false;
1224
1225         /* Check enabled features */
1226         if (pCreateInfo->pEnabledFeatures) {
1227                 VkPhysicalDeviceFeatures supported_features;
1228                 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1229                 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1230                 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1231                 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1232                 for (uint32_t i = 0; i < num_features; i++) {
1233                         if (enabled_feature[i] && !supported_feature[i])
1234                                 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1235                 }
1236         }
1237
1238         device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1239                             sizeof(*device), 8,
1240                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1241         if (!device)
1242                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1243
1244         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1245         device->instance = physical_device->instance;
1246         device->physical_device = physical_device;
1247
1248         device->ws = physical_device->ws;
1249         if (pAllocator)
1250                 device->alloc = *pAllocator;
1251         else
1252                 device->alloc = physical_device->instance->alloc;
1253
1254         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1255                 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1256                 int index = radv_get_device_extension_index(ext_name);
1257                 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1258                         vk_free(&device->alloc, device);
1259                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1260                 }
1261
1262                 device->enabled_extensions.extensions[index] = true;
1263         }
1264
1265         keep_shader_info = device->enabled_extensions.AMD_shader_info;
1266
1267         mtx_init(&device->shader_slab_mutex, mtx_plain);
1268         list_inithead(&device->shader_slabs);
1269
1270         for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1271                 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1272                 uint32_t qfi = queue_create->queueFamilyIndex;
1273                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1274                         vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1275
1276                 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1277
1278                 device->queues[qfi] = vk_alloc(&device->alloc,
1279                                                queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1280                 if (!device->queues[qfi]) {
1281                         result = VK_ERROR_OUT_OF_HOST_MEMORY;
1282                         goto fail;
1283                 }
1284
1285                 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1286
1287                 device->queue_count[qfi] = queue_create->queueCount;
1288
1289                 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1290                         result = radv_queue_init(device, &device->queues[qfi][q],
1291                                                  qfi, q, queue_create->flags,
1292                                                  global_priority);
1293                         if (result != VK_SUCCESS)
1294                                 goto fail;
1295                 }
1296         }
1297
1298         device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1299                               (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
1300
1301         /* Disabled and not implemented for now. */
1302         device->dfsm_allowed = device->pbb_allowed && false;
1303
1304 #ifdef ANDROID
1305         device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1306 #endif
1307
1308         device->llvm_supports_spill = true;
1309
1310         /* The maximum number of scratch waves. Scratch space isn't divided
1311          * evenly between CUs. The number is only a function of the number of CUs.
1312          * We can decrease the constant to decrease the scratch buffer size.
1313          *
1314          * sctx->scratch_waves must be >= the maximum posible size of
1315          * 1 threadgroup, so that the hw doesn't hang from being unable
1316          * to start any.
1317          *
1318          * The recommended value is 4 per CU at most. Higher numbers don't
1319          * bring much benefit, but they still occupy chip resources (think
1320          * async compute). I've seen ~2% performance difference between 4 and 32.
1321          */
1322         uint32_t max_threads_per_block = 2048;
1323         device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1324                                      max_threads_per_block / 64);
1325
1326         device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1327
1328         if (device->physical_device->rad_info.chip_class >= CIK) {
1329                 /* If the KMD allows it (there is a KMD hw register for it),
1330                  * allow launching waves out-of-order.
1331                  */
1332                 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1333         }
1334
1335         radv_device_init_gs_info(device);
1336
1337         device->tess_offchip_block_dw_size =
1338                 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1339         device->has_distributed_tess =
1340                 device->physical_device->rad_info.chip_class >= VI &&
1341                 device->physical_device->rad_info.max_se >= 2;
1342
1343         if (getenv("RADV_TRACE_FILE")) {
1344                 const char *filename = getenv("RADV_TRACE_FILE");
1345
1346                 keep_shader_info = true;
1347
1348                 if (!radv_init_trace(device))
1349                         goto fail;
1350
1351                 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
1352                 radv_dump_enabled_options(device, stderr);
1353         }
1354
1355         device->keep_shader_info = keep_shader_info;
1356
1357         result = radv_device_init_meta(device);
1358         if (result != VK_SUCCESS)
1359                 goto fail;
1360
1361         radv_device_init_msaa(device);
1362
1363         for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1364                 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1365                 switch (family) {
1366                 case RADV_QUEUE_GENERAL:
1367                         radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1368                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1369                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1370                         break;
1371                 case RADV_QUEUE_COMPUTE:
1372                         radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1373                         radeon_emit(device->empty_cs[family], 0);
1374                         break;
1375                 }
1376                 device->ws->cs_finalize(device->empty_cs[family]);
1377         }
1378
1379         if (device->physical_device->rad_info.chip_class >= CIK)
1380                 cik_create_gfx_config(device);
1381
1382         VkPipelineCacheCreateInfo ci;
1383         ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1384         ci.pNext = NULL;
1385         ci.flags = 0;
1386         ci.pInitialData = NULL;
1387         ci.initialDataSize = 0;
1388         VkPipelineCache pc;
1389         result = radv_CreatePipelineCache(radv_device_to_handle(device),
1390                                           &ci, NULL, &pc);
1391         if (result != VK_SUCCESS)
1392                 goto fail_meta;
1393
1394         device->mem_cache = radv_pipeline_cache_from_handle(pc);
1395
1396         *pDevice = radv_device_to_handle(device);
1397         return VK_SUCCESS;
1398
1399 fail_meta:
1400         radv_device_finish_meta(device);
1401 fail:
1402         if (device->trace_bo)
1403                 device->ws->buffer_destroy(device->trace_bo);
1404
1405         if (device->gfx_init)
1406                 device->ws->buffer_destroy(device->gfx_init);
1407
1408         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1409                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1410                         radv_queue_finish(&device->queues[i][q]);
1411                 if (device->queue_count[i])
1412                         vk_free(&device->alloc, device->queues[i]);
1413         }
1414
1415         vk_free(&device->alloc, device);
1416         return result;
1417 }
1418
1419 void radv_DestroyDevice(
1420         VkDevice                                    _device,
1421         const VkAllocationCallbacks*                pAllocator)
1422 {
1423         RADV_FROM_HANDLE(radv_device, device, _device);
1424
1425         if (!device)
1426                 return;
1427
1428         if (device->trace_bo)
1429                 device->ws->buffer_destroy(device->trace_bo);
1430
1431         if (device->gfx_init)
1432                 device->ws->buffer_destroy(device->gfx_init);
1433
1434         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1435                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1436                         radv_queue_finish(&device->queues[i][q]);
1437                 if (device->queue_count[i])
1438                         vk_free(&device->alloc, device->queues[i]);
1439                 if (device->empty_cs[i])
1440                         device->ws->cs_destroy(device->empty_cs[i]);
1441         }
1442         radv_device_finish_meta(device);
1443
1444         VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1445         radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1446
1447         radv_destroy_shader_slabs(device);
1448
1449         vk_free(&device->alloc, device);
1450 }
1451
1452 VkResult radv_EnumerateInstanceLayerProperties(
1453         uint32_t*                                   pPropertyCount,
1454         VkLayerProperties*                          pProperties)
1455 {
1456         if (pProperties == NULL) {
1457                 *pPropertyCount = 0;
1458                 return VK_SUCCESS;
1459         }
1460
1461         /* None supported at this time */
1462         return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1463 }
1464
1465 VkResult radv_EnumerateDeviceLayerProperties(
1466         VkPhysicalDevice                            physicalDevice,
1467         uint32_t*                                   pPropertyCount,
1468         VkLayerProperties*                          pProperties)
1469 {
1470         if (pProperties == NULL) {
1471                 *pPropertyCount = 0;
1472                 return VK_SUCCESS;
1473         }
1474
1475         /* None supported at this time */
1476         return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1477 }
1478
1479 void radv_GetDeviceQueue2(
1480         VkDevice                                    _device,
1481         const VkDeviceQueueInfo2*                   pQueueInfo,
1482         VkQueue*                                    pQueue)
1483 {
1484         RADV_FROM_HANDLE(radv_device, device, _device);
1485         struct radv_queue *queue;
1486
1487         queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
1488         if (pQueueInfo->flags != queue->flags) {
1489                 /* From the Vulkan 1.1.70 spec:
1490                  *
1491                  * "The queue returned by vkGetDeviceQueue2 must have the same
1492                  * flags value from this structure as that used at device
1493                  * creation time in a VkDeviceQueueCreateInfo instance. If no
1494                  * matching flags were specified at device creation time then
1495                  * pQueue will return VK_NULL_HANDLE."
1496                  */
1497                 *pQueue = VK_NULL_HANDLE;
1498                 return;
1499         }
1500
1501         *pQueue = radv_queue_to_handle(queue);
1502 }
1503
1504 void radv_GetDeviceQueue(
1505         VkDevice                                    _device,
1506         uint32_t                                    queueFamilyIndex,
1507         uint32_t                                    queueIndex,
1508         VkQueue*                                    pQueue)
1509 {
1510         const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1511                 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1512                 .queueFamilyIndex = queueFamilyIndex,
1513                 .queueIndex = queueIndex
1514         };
1515
1516         radv_GetDeviceQueue2(_device, &info, pQueue);
1517 }
1518
1519 static void
1520 fill_geom_tess_rings(struct radv_queue *queue,
1521                      uint32_t *map,
1522                      bool add_sample_positions,
1523                      uint32_t esgs_ring_size,
1524                      struct radeon_winsys_bo *esgs_ring_bo,
1525                      uint32_t gsvs_ring_size,
1526                      struct radeon_winsys_bo *gsvs_ring_bo,
1527                      uint32_t tess_factor_ring_size,
1528                      uint32_t tess_offchip_ring_offset,
1529                      uint32_t tess_offchip_ring_size,
1530                      struct radeon_winsys_bo *tess_rings_bo)
1531 {
1532         uint64_t esgs_va = 0, gsvs_va = 0;
1533         uint64_t tess_va = 0, tess_offchip_va = 0;
1534         uint32_t *desc = &map[4];
1535
1536         if (esgs_ring_bo)
1537                 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1538         if (gsvs_ring_bo)
1539                 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1540         if (tess_rings_bo) {
1541                 tess_va = radv_buffer_get_va(tess_rings_bo);
1542                 tess_offchip_va = tess_va + tess_offchip_ring_offset;
1543         }
1544
1545         /* stride 0, num records - size, add tid, swizzle, elsize4,
1546            index stride 64 */
1547         desc[0] = esgs_va;
1548         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1549                 S_008F04_STRIDE(0) |
1550                 S_008F04_SWIZZLE_ENABLE(true);
1551         desc[2] = esgs_ring_size;
1552         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1553                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1554                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1555                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1556                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1557                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1558                 S_008F0C_ELEMENT_SIZE(1) |
1559                 S_008F0C_INDEX_STRIDE(3) |
1560                 S_008F0C_ADD_TID_ENABLE(true);
1561
1562         desc += 4;
1563         /* GS entry for ES->GS ring */
1564         /* stride 0, num records - size, elsize0,
1565            index stride 0 */
1566         desc[0] = esgs_va;
1567         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1568                 S_008F04_STRIDE(0) |
1569                 S_008F04_SWIZZLE_ENABLE(false);
1570         desc[2] = esgs_ring_size;
1571         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1572                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1573                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1574                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1575                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1576                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1577                 S_008F0C_ELEMENT_SIZE(0) |
1578                 S_008F0C_INDEX_STRIDE(0) |
1579                 S_008F0C_ADD_TID_ENABLE(false);
1580
1581         desc += 4;
1582         /* VS entry for GS->VS ring */
1583         /* stride 0, num records - size, elsize0,
1584            index stride 0 */
1585         desc[0] = gsvs_va;
1586         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1587                 S_008F04_STRIDE(0) |
1588                 S_008F04_SWIZZLE_ENABLE(false);
1589         desc[2] = gsvs_ring_size;
1590         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1591                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1592                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1593                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1594                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1595                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1596                 S_008F0C_ELEMENT_SIZE(0) |
1597                 S_008F0C_INDEX_STRIDE(0) |
1598                 S_008F0C_ADD_TID_ENABLE(false);
1599         desc += 4;
1600
1601         /* stride gsvs_itemsize, num records 64
1602            elsize 4, index stride 16 */
1603         /* shader will patch stride and desc[2] */
1604         desc[0] = gsvs_va;
1605         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1606                 S_008F04_STRIDE(0) |
1607                 S_008F04_SWIZZLE_ENABLE(true);
1608         desc[2] = 0;
1609         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1610                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1611                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1612                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1613                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1614                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1615                 S_008F0C_ELEMENT_SIZE(1) |
1616                 S_008F0C_INDEX_STRIDE(1) |
1617                 S_008F0C_ADD_TID_ENABLE(true);
1618         desc += 4;
1619
1620         desc[0] = tess_va;
1621         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
1622                 S_008F04_STRIDE(0) |
1623                 S_008F04_SWIZZLE_ENABLE(false);
1624         desc[2] = tess_factor_ring_size;
1625         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1626                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1627                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1628                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1629                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1630                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1631                 S_008F0C_ELEMENT_SIZE(0) |
1632                 S_008F0C_INDEX_STRIDE(0) |
1633                 S_008F0C_ADD_TID_ENABLE(false);
1634         desc += 4;
1635
1636         desc[0] = tess_offchip_va;
1637         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1638                 S_008F04_STRIDE(0) |
1639                 S_008F04_SWIZZLE_ENABLE(false);
1640         desc[2] = tess_offchip_ring_size;
1641         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1642                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1643                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1644                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1645                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1646                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1647                 S_008F0C_ELEMENT_SIZE(0) |
1648                 S_008F0C_INDEX_STRIDE(0) |
1649                 S_008F0C_ADD_TID_ENABLE(false);
1650         desc += 4;
1651
1652         /* add sample positions after all rings */
1653         memcpy(desc, queue->device->sample_locations_1x, 8);
1654         desc += 2;
1655         memcpy(desc, queue->device->sample_locations_2x, 16);
1656         desc += 4;
1657         memcpy(desc, queue->device->sample_locations_4x, 32);
1658         desc += 8;
1659         memcpy(desc, queue->device->sample_locations_8x, 64);
1660         desc += 16;
1661         memcpy(desc, queue->device->sample_locations_16x, 128);
1662 }
1663
1664 static unsigned
1665 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1666 {
1667         bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1668                 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1669                 device->physical_device->rad_info.family != CHIP_STONEY;
1670         unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1671         unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1672                 device->physical_device->rad_info.max_se;
1673         unsigned offchip_granularity;
1674         unsigned hs_offchip_param;
1675         switch (device->tess_offchip_block_dw_size) {
1676         default:
1677                 assert(0);
1678                 /* fall through */
1679         case 8192:
1680                 offchip_granularity = V_03093C_X_8K_DWORDS;
1681                 break;
1682         case 4096:
1683                 offchip_granularity = V_03093C_X_4K_DWORDS;
1684                 break;
1685         }
1686
1687         switch (device->physical_device->rad_info.chip_class) {
1688         case SI:
1689                 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1690                 break;
1691         case CIK:
1692         case VI:
1693         case GFX9:
1694         default:
1695                 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1696                 break;
1697         }
1698
1699         *max_offchip_buffers_p = max_offchip_buffers;
1700         if (device->physical_device->rad_info.chip_class >= CIK) {
1701                 if (device->physical_device->rad_info.chip_class >= VI)
1702                         --max_offchip_buffers;
1703                 hs_offchip_param =
1704                         S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1705                         S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1706         } else {
1707                 hs_offchip_param =
1708                         S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1709         }
1710         return hs_offchip_param;
1711 }
1712
1713 static VkResult
1714 radv_get_preamble_cs(struct radv_queue *queue,
1715                      uint32_t scratch_size,
1716                      uint32_t compute_scratch_size,
1717                      uint32_t esgs_ring_size,
1718                      uint32_t gsvs_ring_size,
1719                      bool needs_tess_rings,
1720                      bool needs_sample_positions,
1721                      struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1722                      struct radeon_winsys_cs **initial_preamble_cs,
1723                      struct radeon_winsys_cs **continue_preamble_cs)
1724 {
1725         struct radeon_winsys_bo *scratch_bo = NULL;
1726         struct radeon_winsys_bo *descriptor_bo = NULL;
1727         struct radeon_winsys_bo *compute_scratch_bo = NULL;
1728         struct radeon_winsys_bo *esgs_ring_bo = NULL;
1729         struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1730         struct radeon_winsys_bo *tess_rings_bo = NULL;
1731         struct radeon_winsys_cs *dest_cs[3] = {0};
1732         bool add_tess_rings = false, add_sample_positions = false;
1733         unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1734         unsigned max_offchip_buffers;
1735         unsigned hs_offchip_param = 0;
1736         unsigned tess_offchip_ring_offset;
1737         uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1738         if (!queue->has_tess_rings) {
1739                 if (needs_tess_rings)
1740                         add_tess_rings = true;
1741         }
1742         if (!queue->has_sample_positions) {
1743                 if (needs_sample_positions)
1744                         add_sample_positions = true;
1745         }
1746         tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1747         hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1748                                                      &max_offchip_buffers);
1749         tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
1750         tess_offchip_ring_size = max_offchip_buffers *
1751                 queue->device->tess_offchip_block_dw_size * 4;
1752
1753         if (scratch_size <= queue->scratch_size &&
1754             compute_scratch_size <= queue->compute_scratch_size &&
1755             esgs_ring_size <= queue->esgs_ring_size &&
1756             gsvs_ring_size <= queue->gsvs_ring_size &&
1757             !add_tess_rings && !add_sample_positions &&
1758             queue->initial_preamble_cs) {
1759                 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1760                 *initial_preamble_cs = queue->initial_preamble_cs;
1761                 *continue_preamble_cs = queue->continue_preamble_cs;
1762                 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1763                         *continue_preamble_cs = NULL;
1764                 return VK_SUCCESS;
1765         }
1766
1767         if (scratch_size > queue->scratch_size) {
1768                 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1769                                                               scratch_size,
1770                                                               4096,
1771                                                               RADEON_DOMAIN_VRAM,
1772                                                               ring_bo_flags);
1773                 if (!scratch_bo)
1774                         goto fail;
1775         } else
1776                 scratch_bo = queue->scratch_bo;
1777
1778         if (compute_scratch_size > queue->compute_scratch_size) {
1779                 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1780                                                                       compute_scratch_size,
1781                                                                       4096,
1782                                                                       RADEON_DOMAIN_VRAM,
1783                                                                       ring_bo_flags);
1784                 if (!compute_scratch_bo)
1785                         goto fail;
1786
1787         } else
1788                 compute_scratch_bo = queue->compute_scratch_bo;
1789
1790         if (esgs_ring_size > queue->esgs_ring_size) {
1791                 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1792                                                                 esgs_ring_size,
1793                                                                 4096,
1794                                                                 RADEON_DOMAIN_VRAM,
1795                                                                 ring_bo_flags);
1796                 if (!esgs_ring_bo)
1797                         goto fail;
1798         } else {
1799                 esgs_ring_bo = queue->esgs_ring_bo;
1800                 esgs_ring_size = queue->esgs_ring_size;
1801         }
1802
1803         if (gsvs_ring_size > queue->gsvs_ring_size) {
1804                 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1805                                                                 gsvs_ring_size,
1806                                                                 4096,
1807                                                                 RADEON_DOMAIN_VRAM,
1808                                                                 ring_bo_flags);
1809                 if (!gsvs_ring_bo)
1810                         goto fail;
1811         } else {
1812                 gsvs_ring_bo = queue->gsvs_ring_bo;
1813                 gsvs_ring_size = queue->gsvs_ring_size;
1814         }
1815
1816         if (add_tess_rings) {
1817                 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
1818                                                                  tess_offchip_ring_offset + tess_offchip_ring_size,
1819                                                                  256,
1820                                                                  RADEON_DOMAIN_VRAM,
1821                                                                  ring_bo_flags);
1822                 if (!tess_rings_bo)
1823                         goto fail;
1824         } else {
1825                 tess_rings_bo = queue->tess_rings_bo;
1826         }
1827
1828         if (scratch_bo != queue->scratch_bo ||
1829             esgs_ring_bo != queue->esgs_ring_bo ||
1830             gsvs_ring_bo != queue->gsvs_ring_bo ||
1831             tess_rings_bo != queue->tess_rings_bo ||
1832             add_sample_positions) {
1833                 uint32_t size = 0;
1834                 if (gsvs_ring_bo || esgs_ring_bo ||
1835                     tess_rings_bo || add_sample_positions) {
1836                         size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1837                         if (add_sample_positions)
1838                                 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1839                 }
1840                 else if (scratch_bo)
1841                         size = 8; /* 2 dword */
1842
1843                 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1844                                                                  size,
1845                                                                  4096,
1846                                                                  RADEON_DOMAIN_VRAM,
1847                                                                  RADEON_FLAG_CPU_ACCESS |
1848                                                                  RADEON_FLAG_NO_INTERPROCESS_SHARING |
1849                                                                  RADEON_FLAG_READ_ONLY);
1850                 if (!descriptor_bo)
1851                         goto fail;
1852         } else
1853                 descriptor_bo = queue->descriptor_bo;
1854
1855         for(int i = 0; i < 3; ++i) {
1856                 struct radeon_winsys_cs *cs = NULL;
1857                 cs = queue->device->ws->cs_create(queue->device->ws,
1858                                                   queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1859                 if (!cs)
1860                         goto fail;
1861
1862                 dest_cs[i] = cs;
1863
1864                 if (scratch_bo)
1865                         radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1866
1867                 if (esgs_ring_bo)
1868                         radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1869
1870                 if (gsvs_ring_bo)
1871                         radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1872
1873                 if (tess_rings_bo)
1874                         radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo, 8);
1875
1876                 if (descriptor_bo)
1877                         radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1878
1879                 if (descriptor_bo != queue->descriptor_bo) {
1880                         uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1881
1882                         if (scratch_bo) {
1883                                 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1884                                 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1885                                                  S_008F04_SWIZZLE_ENABLE(1);
1886                                 map[0] = scratch_va;
1887                                 map[1] = rsrc1;
1888                         }
1889
1890                         if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
1891                             add_sample_positions)
1892                                 fill_geom_tess_rings(queue, map, add_sample_positions,
1893                                                      esgs_ring_size, esgs_ring_bo,
1894                                                      gsvs_ring_size, gsvs_ring_bo,
1895                                                      tess_factor_ring_size,
1896                                                      tess_offchip_ring_offset,
1897                                                      tess_offchip_ring_size,
1898                                                      tess_rings_bo);
1899
1900                         queue->device->ws->buffer_unmap(descriptor_bo);
1901                 }
1902
1903                 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
1904                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1905                         radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1906                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1907                         radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1908                 }
1909
1910                 if (esgs_ring_bo || gsvs_ring_bo) {
1911                         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1912                                 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1913                                 radeon_emit(cs, esgs_ring_size >> 8);
1914                                 radeon_emit(cs, gsvs_ring_size >> 8);
1915                         } else {
1916                                 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1917                                 radeon_emit(cs, esgs_ring_size >> 8);
1918                                 radeon_emit(cs, gsvs_ring_size >> 8);
1919                         }
1920                 }
1921
1922                 if (tess_rings_bo) {
1923                         uint64_t tf_va = radv_buffer_get_va(tess_rings_bo);
1924                         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1925                                 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1926                                                        S_030938_SIZE(tess_factor_ring_size / 4));
1927                                 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1928                                                        tf_va >> 8);
1929                                 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1930                                         radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1931                                                                S_030944_BASE_HI(tf_va >> 40));
1932                                 }
1933                                 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1934                         } else {
1935                                 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1936                                                       S_008988_SIZE(tess_factor_ring_size / 4));
1937                                 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1938                                                       tf_va >> 8);
1939                                 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1940                                                       hs_offchip_param);
1941                         }
1942                 }
1943
1944                 if (descriptor_bo) {
1945                         uint64_t va = radv_buffer_get_va(descriptor_bo);
1946                         if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1947                                 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1948                                                 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1949                                                 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1950                                                 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1951
1952                                 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1953                                         radeon_set_sh_reg_seq(cs, regs[i], 2);
1954                                         radeon_emit(cs, va);
1955                                         radeon_emit(cs, va >> 32);
1956                                 }
1957                         } else {
1958                                 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1959                                                 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1960                                                 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1961                                                 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1962                                                 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1963                                                 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1964
1965                                 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1966                                         radeon_set_sh_reg_seq(cs, regs[i], 2);
1967                                         radeon_emit(cs, va);
1968                                         radeon_emit(cs, va >> 32);
1969                                 }
1970                         }
1971                 }
1972
1973                 if (compute_scratch_bo) {
1974                         uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1975                         uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1976                                          S_008F04_SWIZZLE_ENABLE(1);
1977
1978                         radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1979
1980                         radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1981                         radeon_emit(cs, scratch_va);
1982                         radeon_emit(cs, rsrc1);
1983                 }
1984
1985                 if (i == 0) {
1986                         si_cs_emit_cache_flush(cs,
1987                                                queue->device->physical_device->rad_info.chip_class,
1988                                                NULL, 0,
1989                                                queue->queue_family_index == RING_COMPUTE &&
1990                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
1991                                                (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1992                                                RADV_CMD_FLAG_INV_ICACHE |
1993                                                RADV_CMD_FLAG_INV_SMEM_L1 |
1994                                                RADV_CMD_FLAG_INV_VMEM_L1 |
1995                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
1996                 } else if (i == 1) {
1997                         si_cs_emit_cache_flush(cs,
1998                                                queue->device->physical_device->rad_info.chip_class,
1999                                                NULL, 0,
2000                                                queue->queue_family_index == RING_COMPUTE &&
2001                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
2002                                                RADV_CMD_FLAG_INV_ICACHE |
2003                                                RADV_CMD_FLAG_INV_SMEM_L1 |
2004                                                RADV_CMD_FLAG_INV_VMEM_L1 |
2005                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
2006                 }
2007
2008                 if (!queue->device->ws->cs_finalize(cs))
2009                         goto fail;
2010         }
2011
2012         if (queue->initial_full_flush_preamble_cs)
2013                         queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2014
2015         if (queue->initial_preamble_cs)
2016                         queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2017
2018         if (queue->continue_preamble_cs)
2019                         queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2020
2021         queue->initial_full_flush_preamble_cs = dest_cs[0];
2022         queue->initial_preamble_cs = dest_cs[1];
2023         queue->continue_preamble_cs = dest_cs[2];
2024
2025         if (scratch_bo != queue->scratch_bo) {
2026                 if (queue->scratch_bo)
2027                         queue->device->ws->buffer_destroy(queue->scratch_bo);
2028                 queue->scratch_bo = scratch_bo;
2029                 queue->scratch_size = scratch_size;
2030         }
2031
2032         if (compute_scratch_bo != queue->compute_scratch_bo) {
2033                 if (queue->compute_scratch_bo)
2034                         queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2035                 queue->compute_scratch_bo = compute_scratch_bo;
2036                 queue->compute_scratch_size = compute_scratch_size;
2037         }
2038
2039         if (esgs_ring_bo != queue->esgs_ring_bo) {
2040                 if (queue->esgs_ring_bo)
2041                         queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2042                 queue->esgs_ring_bo = esgs_ring_bo;
2043                 queue->esgs_ring_size = esgs_ring_size;
2044         }
2045
2046         if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2047                 if (queue->gsvs_ring_bo)
2048                         queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2049                 queue->gsvs_ring_bo = gsvs_ring_bo;
2050                 queue->gsvs_ring_size = gsvs_ring_size;
2051         }
2052
2053         if (tess_rings_bo != queue->tess_rings_bo) {
2054                 queue->tess_rings_bo = tess_rings_bo;
2055                 queue->has_tess_rings = true;
2056         }
2057
2058         if (descriptor_bo != queue->descriptor_bo) {
2059                 if (queue->descriptor_bo)
2060                         queue->device->ws->buffer_destroy(queue->descriptor_bo);
2061
2062                 queue->descriptor_bo = descriptor_bo;
2063         }
2064
2065         if (add_sample_positions)
2066                 queue->has_sample_positions = true;
2067
2068         *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2069         *initial_preamble_cs = queue->initial_preamble_cs;
2070         *continue_preamble_cs = queue->continue_preamble_cs;
2071         if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2072                         *continue_preamble_cs = NULL;
2073         return VK_SUCCESS;
2074 fail:
2075         for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2076                 if (dest_cs[i])
2077                         queue->device->ws->cs_destroy(dest_cs[i]);
2078         if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2079                 queue->device->ws->buffer_destroy(descriptor_bo);
2080         if (scratch_bo && scratch_bo != queue->scratch_bo)
2081                 queue->device->ws->buffer_destroy(scratch_bo);
2082         if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2083                 queue->device->ws->buffer_destroy(compute_scratch_bo);
2084         if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2085                 queue->device->ws->buffer_destroy(esgs_ring_bo);
2086         if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2087                 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2088         if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2089                 queue->device->ws->buffer_destroy(tess_rings_bo);
2090         return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2091 }
2092
2093 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
2094                                       int num_sems,
2095                                       const VkSemaphore *sems,
2096                                       VkFence _fence,
2097                                       bool reset_temp)
2098 {
2099         int syncobj_idx = 0, sem_idx = 0;
2100
2101         if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2102                 return VK_SUCCESS;
2103
2104         for (uint32_t i = 0; i < num_sems; i++) {
2105                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2106
2107                 if (sem->temp_syncobj || sem->syncobj)
2108                         counts->syncobj_count++;
2109                 else
2110                         counts->sem_count++;
2111         }
2112
2113         if (_fence != VK_NULL_HANDLE) {
2114                 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2115                 if (fence->temp_syncobj || fence->syncobj)
2116                         counts->syncobj_count++;
2117         }
2118
2119         if (counts->syncobj_count) {
2120                 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2121                 if (!counts->syncobj)
2122                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2123         }
2124
2125         if (counts->sem_count) {
2126                 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2127                 if (!counts->sem) {
2128                         free(counts->syncobj);
2129                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2130                 }
2131         }
2132
2133         for (uint32_t i = 0; i < num_sems; i++) {
2134                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2135
2136                 if (sem->temp_syncobj) {
2137                         counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2138                 }
2139                 else if (sem->syncobj)
2140                         counts->syncobj[syncobj_idx++] = sem->syncobj;
2141                 else {
2142                         assert(sem->sem);
2143                         counts->sem[sem_idx++] = sem->sem;
2144                 }
2145         }
2146
2147         if (_fence != VK_NULL_HANDLE) {
2148                 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2149                 if (fence->temp_syncobj)
2150                         counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2151                 else if (fence->syncobj)
2152                         counts->syncobj[syncobj_idx++] = fence->syncobj;
2153         }
2154
2155         return VK_SUCCESS;
2156 }
2157
2158 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2159 {
2160         free(sem_info->wait.syncobj);
2161         free(sem_info->wait.sem);
2162         free(sem_info->signal.syncobj);
2163         free(sem_info->signal.sem);
2164 }
2165
2166
2167 static void radv_free_temp_syncobjs(struct radv_device *device,
2168                                     int num_sems,
2169                                     const VkSemaphore *sems)
2170 {
2171         for (uint32_t i = 0; i < num_sems; i++) {
2172                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2173
2174                 if (sem->temp_syncobj) {
2175                         device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2176                         sem->temp_syncobj = 0;
2177                 }
2178         }
2179 }
2180
2181 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
2182                              int num_wait_sems,
2183                              const VkSemaphore *wait_sems,
2184                              int num_signal_sems,
2185                              const VkSemaphore *signal_sems,
2186                              VkFence fence)
2187 {
2188         VkResult ret;
2189         memset(sem_info, 0, sizeof(*sem_info));
2190
2191         ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2192         if (ret)
2193                 return ret;
2194         ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
2195         if (ret)
2196                 radv_free_sem_info(sem_info);
2197
2198         /* caller can override these */
2199         sem_info->cs_emit_wait = true;
2200         sem_info->cs_emit_signal = true;
2201         return ret;
2202 }
2203
2204 /* Signals fence as soon as all the work currently put on queue is done. */
2205 static VkResult radv_signal_fence(struct radv_queue *queue,
2206                               struct radv_fence *fence)
2207 {
2208         int ret;
2209         VkResult result;
2210         struct radv_winsys_sem_info sem_info;
2211
2212         result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
2213                                      radv_fence_to_handle(fence));
2214         if (result != VK_SUCCESS)
2215                 return result;
2216
2217         ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2218                                            &queue->device->empty_cs[queue->queue_family_index],
2219                                            1, NULL, NULL, &sem_info,
2220                                            false, fence->fence);
2221         radv_free_sem_info(&sem_info);
2222
2223         /* TODO: find a better error */
2224         if (ret)
2225                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2226
2227         return VK_SUCCESS;
2228 }
2229
2230 VkResult radv_QueueSubmit(
2231         VkQueue                                     _queue,
2232         uint32_t                                    submitCount,
2233         const VkSubmitInfo*                         pSubmits,
2234         VkFence                                     _fence)
2235 {
2236         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2237         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2238         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2239         struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2240         int ret;
2241         uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2242         uint32_t scratch_size = 0;
2243         uint32_t compute_scratch_size = 0;
2244         uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2245         struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2246         VkResult result;
2247         bool fence_emitted = false;
2248         bool tess_rings_needed = false;
2249         bool sample_positions_needed = false;
2250
2251         /* Do this first so failing to allocate scratch buffers can't result in
2252          * partially executed submissions. */
2253         for (uint32_t i = 0; i < submitCount; i++) {
2254                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2255                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2256                                          pSubmits[i].pCommandBuffers[j]);
2257
2258                         scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2259                         compute_scratch_size = MAX2(compute_scratch_size,
2260                                                     cmd_buffer->compute_scratch_size_needed);
2261                         esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2262                         gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2263                         tess_rings_needed |= cmd_buffer->tess_rings_needed;
2264                         sample_positions_needed |= cmd_buffer->sample_positions_needed;
2265                 }
2266         }
2267
2268         result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2269                                       esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2270                                       sample_positions_needed, &initial_flush_preamble_cs,
2271                                       &initial_preamble_cs, &continue_preamble_cs);
2272         if (result != VK_SUCCESS)
2273                 return result;
2274
2275         for (uint32_t i = 0; i < submitCount; i++) {
2276                 struct radeon_winsys_cs **cs_array;
2277                 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2278                 bool can_patch = true;
2279                 uint32_t advance;
2280                 struct radv_winsys_sem_info sem_info;
2281
2282                 result = radv_alloc_sem_info(&sem_info,
2283                                              pSubmits[i].waitSemaphoreCount,
2284                                              pSubmits[i].pWaitSemaphores,
2285                                              pSubmits[i].signalSemaphoreCount,
2286                                              pSubmits[i].pSignalSemaphores,
2287                                              _fence);
2288                 if (result != VK_SUCCESS)
2289                         return result;
2290
2291                 if (!pSubmits[i].commandBufferCount) {
2292                         if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2293                                 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2294                                                                    &queue->device->empty_cs[queue->queue_family_index],
2295                                                                    1, NULL, NULL,
2296                                                                    &sem_info,
2297                                                                    false, base_fence);
2298                                 if (ret) {
2299                                         radv_loge("failed to submit CS %d\n", i);
2300                                         abort();
2301                                 }
2302                                 fence_emitted = true;
2303                         }
2304                         radv_free_sem_info(&sem_info);
2305                         continue;
2306                 }
2307
2308                 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2309                                                 (pSubmits[i].commandBufferCount));
2310
2311                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2312                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2313                                          pSubmits[i].pCommandBuffers[j]);
2314                         assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2315
2316                         cs_array[j] = cmd_buffer->cs;
2317                         if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2318                                 can_patch = false;
2319
2320                         cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2321                 }
2322
2323                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2324                         struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2325                         advance = MIN2(max_cs_submission,
2326                                        pSubmits[i].commandBufferCount - j);
2327
2328                         if (queue->device->trace_bo)
2329                                 *queue->device->trace_id_ptr = 0;
2330
2331                         sem_info.cs_emit_wait = j == 0;
2332                         sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2333
2334                         ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2335                                                         advance, initial_preamble, continue_preamble_cs,
2336                                                            &sem_info,
2337                                                         can_patch, base_fence);
2338
2339                         if (ret) {
2340                                 radv_loge("failed to submit CS %d\n", i);
2341                                 abort();
2342                         }
2343                         fence_emitted = true;
2344                         if (queue->device->trace_bo) {
2345                                 radv_check_gpu_hangs(queue, cs_array[j]);
2346                         }
2347                 }
2348
2349                 radv_free_temp_syncobjs(queue->device,
2350                                         pSubmits[i].waitSemaphoreCount,
2351                                         pSubmits[i].pWaitSemaphores);
2352                 radv_free_sem_info(&sem_info);
2353                 free(cs_array);
2354         }
2355
2356         if (fence) {
2357                 if (!fence_emitted) {
2358                         radv_signal_fence(queue, fence);
2359                 }
2360                 fence->submitted = true;
2361         }
2362
2363         return VK_SUCCESS;
2364 }
2365
2366 VkResult radv_QueueWaitIdle(
2367         VkQueue                                     _queue)
2368 {
2369         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2370
2371         queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2372                                          radv_queue_family_to_ring(queue->queue_family_index),
2373                                          queue->queue_idx);
2374         return VK_SUCCESS;
2375 }
2376
2377 VkResult radv_DeviceWaitIdle(
2378         VkDevice                                    _device)
2379 {
2380         RADV_FROM_HANDLE(radv_device, device, _device);
2381
2382         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2383                 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2384                         radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2385                 }
2386         }
2387         return VK_SUCCESS;
2388 }
2389
2390 VkResult radv_EnumerateInstanceExtensionProperties(
2391     const char*                                 pLayerName,
2392     uint32_t*                                   pPropertyCount,
2393     VkExtensionProperties*                      pProperties)
2394 {
2395         VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2396
2397         for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2398                 if (radv_supported_instance_extensions.extensions[i]) {
2399                         vk_outarray_append(&out, prop) {
2400                                 *prop = radv_instance_extensions[i];
2401                         }
2402                 }
2403         }
2404
2405         return vk_outarray_status(&out);
2406 }
2407
2408 VkResult radv_EnumerateDeviceExtensionProperties(
2409     VkPhysicalDevice                            physicalDevice,
2410     const char*                                 pLayerName,
2411     uint32_t*                                   pPropertyCount,
2412     VkExtensionProperties*                      pProperties)
2413 {
2414         RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2415         VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2416
2417         for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2418                 if (device->supported_extensions.extensions[i]) {
2419                         vk_outarray_append(&out, prop) {
2420                                 *prop = radv_device_extensions[i];
2421                         }
2422                 }
2423         }
2424
2425         return vk_outarray_status(&out);
2426 }
2427
2428 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2429         VkInstance                                  _instance,
2430         const char*                                 pName)
2431 {
2432         RADV_FROM_HANDLE(radv_instance, instance, _instance);
2433
2434         return radv_lookup_entrypoint_checked(pName,
2435                                               instance ? instance->apiVersion : 0,
2436                                               instance ? &instance->enabled_extensions : NULL,
2437                                               NULL);
2438 }
2439
2440 /* The loader wants us to expose a second GetInstanceProcAddr function
2441  * to work around certain LD_PRELOAD issues seen in apps.
2442  */
2443 PUBLIC
2444 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2445         VkInstance                                  instance,
2446         const char*                                 pName);
2447
2448 PUBLIC
2449 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2450         VkInstance                                  instance,
2451         const char*                                 pName)
2452 {
2453         return radv_GetInstanceProcAddr(instance, pName);
2454 }
2455
2456 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2457         VkDevice                                    _device,
2458         const char*                                 pName)
2459 {
2460         RADV_FROM_HANDLE(radv_device, device, _device);
2461
2462         return radv_lookup_entrypoint_checked(pName,
2463                                               device->instance->apiVersion,
2464                                               &device->instance->enabled_extensions,
2465                                               &device->enabled_extensions);
2466 }
2467
2468 bool radv_get_memory_fd(struct radv_device *device,
2469                         struct radv_device_memory *memory,
2470                         int *pFD)
2471 {
2472         struct radeon_bo_metadata metadata;
2473
2474         if (memory->image) {
2475                 radv_init_metadata(device, memory->image, &metadata);
2476                 device->ws->buffer_set_metadata(memory->bo, &metadata);
2477         }
2478
2479         return device->ws->buffer_get_fd(device->ws, memory->bo,
2480                                          pFD);
2481 }
2482
2483 static VkResult radv_alloc_memory(struct radv_device *device,
2484                                   const VkMemoryAllocateInfo*     pAllocateInfo,
2485                                   const VkAllocationCallbacks*    pAllocator,
2486                                   VkDeviceMemory*                 pMem)
2487 {
2488         struct radv_device_memory *mem;
2489         VkResult result;
2490         enum radeon_bo_domain domain;
2491         uint32_t flags = 0;
2492         enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2493
2494         assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2495
2496         if (pAllocateInfo->allocationSize == 0) {
2497                 /* Apparently, this is allowed */
2498                 *pMem = VK_NULL_HANDLE;
2499                 return VK_SUCCESS;
2500         }
2501
2502         const VkImportMemoryFdInfoKHR *import_info =
2503                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2504         const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2505                 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2506         const VkExportMemoryAllocateInfoKHR *export_info =
2507                 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2508         const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2509                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2510
2511         const struct wsi_memory_allocate_info *wsi_info =
2512                 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2513
2514         mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2515                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2516         if (mem == NULL)
2517                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2518
2519         if (wsi_info && wsi_info->implicit_sync)
2520                 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2521
2522         if (dedicate_info) {
2523                 mem->image = radv_image_from_handle(dedicate_info->image);
2524                 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2525         } else {
2526                 mem->image = NULL;
2527                 mem->buffer = NULL;
2528         }
2529
2530         mem->user_ptr = NULL;
2531
2532         if (import_info) {
2533                 assert(import_info->handleType ==
2534                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2535                        import_info->handleType ==
2536                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2537                 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2538                                                      NULL, NULL);
2539                 if (!mem->bo) {
2540                         result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2541                         goto fail;
2542                 } else {
2543                         close(import_info->fd);
2544                         goto out_success;
2545                 }
2546         }
2547
2548         if (host_ptr_info) {
2549                 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
2550                 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
2551                 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
2552                                                       pAllocateInfo->allocationSize);
2553                 if (!mem->bo) {
2554                         result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2555                         goto fail;
2556                 } else {
2557                         mem->user_ptr = host_ptr_info->pHostPointer;
2558                         goto out_success;
2559                 }
2560         }
2561
2562         uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2563         if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2564             mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2565                 domain = RADEON_DOMAIN_GTT;
2566         else
2567                 domain = RADEON_DOMAIN_VRAM;
2568
2569         if (mem_type_index == RADV_MEM_TYPE_VRAM)
2570                 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2571         else
2572                 flags |= RADEON_FLAG_CPU_ACCESS;
2573
2574         if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2575                 flags |= RADEON_FLAG_GTT_WC;
2576
2577         if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
2578                 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2579
2580         mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2581                                                domain, flags);
2582
2583         if (!mem->bo) {
2584                 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2585                 goto fail;
2586         }
2587         mem->type_index = mem_type_index;
2588 out_success:
2589         *pMem = radv_device_memory_to_handle(mem);
2590
2591         return VK_SUCCESS;
2592
2593 fail:
2594         vk_free2(&device->alloc, pAllocator, mem);
2595
2596         return result;
2597 }
2598
2599 VkResult radv_AllocateMemory(
2600         VkDevice                                    _device,
2601         const VkMemoryAllocateInfo*                 pAllocateInfo,
2602         const VkAllocationCallbacks*                pAllocator,
2603         VkDeviceMemory*                             pMem)
2604 {
2605         RADV_FROM_HANDLE(radv_device, device, _device);
2606         return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2607 }
2608
2609 void radv_FreeMemory(
2610         VkDevice                                    _device,
2611         VkDeviceMemory                              _mem,
2612         const VkAllocationCallbacks*                pAllocator)
2613 {
2614         RADV_FROM_HANDLE(radv_device, device, _device);
2615         RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2616
2617         if (mem == NULL)
2618                 return;
2619
2620         device->ws->buffer_destroy(mem->bo);
2621         mem->bo = NULL;
2622
2623         vk_free2(&device->alloc, pAllocator, mem);
2624 }
2625
2626 VkResult radv_MapMemory(
2627         VkDevice                                    _device,
2628         VkDeviceMemory                              _memory,
2629         VkDeviceSize                                offset,
2630         VkDeviceSize                                size,
2631         VkMemoryMapFlags                            flags,
2632         void**                                      ppData)
2633 {
2634         RADV_FROM_HANDLE(radv_device, device, _device);
2635         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2636
2637         if (mem == NULL) {
2638                 *ppData = NULL;
2639                 return VK_SUCCESS;
2640         }
2641
2642         if (mem->user_ptr)
2643                 *ppData = mem->user_ptr;
2644         else
2645                 *ppData = device->ws->buffer_map(mem->bo);
2646
2647         if (*ppData) {
2648                 *ppData += offset;
2649                 return VK_SUCCESS;
2650         }
2651
2652         return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2653 }
2654
2655 void radv_UnmapMemory(
2656         VkDevice                                    _device,
2657         VkDeviceMemory                              _memory)
2658 {
2659         RADV_FROM_HANDLE(radv_device, device, _device);
2660         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2661
2662         if (mem == NULL)
2663                 return;
2664
2665         if (mem->user_ptr == NULL)
2666                 device->ws->buffer_unmap(mem->bo);
2667 }
2668
2669 VkResult radv_FlushMappedMemoryRanges(
2670         VkDevice                                    _device,
2671         uint32_t                                    memoryRangeCount,
2672         const VkMappedMemoryRange*                  pMemoryRanges)
2673 {
2674         return VK_SUCCESS;
2675 }
2676
2677 VkResult radv_InvalidateMappedMemoryRanges(
2678         VkDevice                                    _device,
2679         uint32_t                                    memoryRangeCount,
2680         const VkMappedMemoryRange*                  pMemoryRanges)
2681 {
2682         return VK_SUCCESS;
2683 }
2684
2685 void radv_GetBufferMemoryRequirements(
2686         VkDevice                                    _device,
2687         VkBuffer                                    _buffer,
2688         VkMemoryRequirements*                       pMemoryRequirements)
2689 {
2690         RADV_FROM_HANDLE(radv_device, device, _device);
2691         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2692
2693         pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2694
2695         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2696                 pMemoryRequirements->alignment = 4096;
2697         else
2698                 pMemoryRequirements->alignment = 16;
2699
2700         pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2701 }
2702
2703 void radv_GetBufferMemoryRequirements2(
2704         VkDevice                                     device,
2705         const VkBufferMemoryRequirementsInfo2KHR*    pInfo,
2706         VkMemoryRequirements2KHR*                    pMemoryRequirements)
2707 {
2708         radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2709                                         &pMemoryRequirements->memoryRequirements);
2710         RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2711         vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2712                 switch (ext->sType) {
2713                 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2714                         VkMemoryDedicatedRequirementsKHR *req =
2715                                        (VkMemoryDedicatedRequirementsKHR *) ext;
2716                         req->requiresDedicatedAllocation = buffer->shareable;
2717                         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2718                         break;
2719                 }
2720                 default:
2721                         break;
2722                 }
2723         }
2724 }
2725
2726 void radv_GetImageMemoryRequirements(
2727         VkDevice                                    _device,
2728         VkImage                                     _image,
2729         VkMemoryRequirements*                       pMemoryRequirements)
2730 {
2731         RADV_FROM_HANDLE(radv_device, device, _device);
2732         RADV_FROM_HANDLE(radv_image, image, _image);
2733
2734         pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2735
2736         pMemoryRequirements->size = image->size;
2737         pMemoryRequirements->alignment = image->alignment;
2738 }
2739
2740 void radv_GetImageMemoryRequirements2(
2741         VkDevice                                    device,
2742         const VkImageMemoryRequirementsInfo2KHR*    pInfo,
2743         VkMemoryRequirements2KHR*                   pMemoryRequirements)
2744 {
2745         radv_GetImageMemoryRequirements(device, pInfo->image,
2746                                         &pMemoryRequirements->memoryRequirements);
2747
2748         RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2749
2750         vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2751                 switch (ext->sType) {
2752                 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2753                         VkMemoryDedicatedRequirementsKHR *req =
2754                                        (VkMemoryDedicatedRequirementsKHR *) ext;
2755                         req->requiresDedicatedAllocation = image->shareable;
2756                         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2757                         break;
2758                 }
2759                 default:
2760                         break;
2761                 }
2762         }
2763 }
2764
2765 void radv_GetImageSparseMemoryRequirements(
2766         VkDevice                                    device,
2767         VkImage                                     image,
2768         uint32_t*                                   pSparseMemoryRequirementCount,
2769         VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
2770 {
2771         stub();
2772 }
2773
2774 void radv_GetImageSparseMemoryRequirements2(
2775         VkDevice                                    device,
2776         const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2777         uint32_t*                                   pSparseMemoryRequirementCount,
2778         VkSparseImageMemoryRequirements2KHR*            pSparseMemoryRequirements)
2779 {
2780         stub();
2781 }
2782
2783 void radv_GetDeviceMemoryCommitment(
2784         VkDevice                                    device,
2785         VkDeviceMemory                              memory,
2786         VkDeviceSize*                               pCommittedMemoryInBytes)
2787 {
2788         *pCommittedMemoryInBytes = 0;
2789 }
2790
2791 VkResult radv_BindBufferMemory2(VkDevice device,
2792                                 uint32_t bindInfoCount,
2793                                 const VkBindBufferMemoryInfoKHR *pBindInfos)
2794 {
2795         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2796                 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2797                 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2798
2799                 if (mem) {
2800                         buffer->bo = mem->bo;
2801                         buffer->offset = pBindInfos[i].memoryOffset;
2802                 } else {
2803                         buffer->bo = NULL;
2804                 }
2805         }
2806         return VK_SUCCESS;
2807 }
2808
2809 VkResult radv_BindBufferMemory(
2810         VkDevice                                    device,
2811         VkBuffer                                    buffer,
2812         VkDeviceMemory                              memory,
2813         VkDeviceSize                                memoryOffset)
2814 {
2815         const VkBindBufferMemoryInfoKHR info = {
2816                 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2817                 .buffer = buffer,
2818                 .memory = memory,
2819                 .memoryOffset = memoryOffset
2820         };
2821
2822         return radv_BindBufferMemory2(device, 1, &info);
2823 }
2824
2825 VkResult radv_BindImageMemory2(VkDevice device,
2826                                uint32_t bindInfoCount,
2827                                const VkBindImageMemoryInfoKHR *pBindInfos)
2828 {
2829         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2830                 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2831                 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2832
2833                 if (mem) {
2834                         image->bo = mem->bo;
2835                         image->offset = pBindInfos[i].memoryOffset;
2836                 } else {
2837                         image->bo = NULL;
2838                         image->offset = 0;
2839                 }
2840         }
2841         return VK_SUCCESS;
2842 }
2843
2844
2845 VkResult radv_BindImageMemory(
2846         VkDevice                                    device,
2847         VkImage                                     image,
2848         VkDeviceMemory                              memory,
2849         VkDeviceSize                                memoryOffset)
2850 {
2851         const VkBindImageMemoryInfoKHR info = {
2852                 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2853                 .image = image,
2854                 .memory = memory,
2855                 .memoryOffset = memoryOffset
2856         };
2857
2858         return radv_BindImageMemory2(device, 1, &info);
2859 }
2860
2861
2862 static void
2863 radv_sparse_buffer_bind_memory(struct radv_device *device,
2864                                const VkSparseBufferMemoryBindInfo *bind)
2865 {
2866         RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2867
2868         for (uint32_t i = 0; i < bind->bindCount; ++i) {
2869                 struct radv_device_memory *mem = NULL;
2870
2871                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2872                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2873
2874                 device->ws->buffer_virtual_bind(buffer->bo,
2875                                                 bind->pBinds[i].resourceOffset,
2876                                                 bind->pBinds[i].size,
2877                                                 mem ? mem->bo : NULL,
2878                                                 bind->pBinds[i].memoryOffset);
2879         }
2880 }
2881
2882 static void
2883 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2884                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
2885 {
2886         RADV_FROM_HANDLE(radv_image, image, bind->image);
2887
2888         for (uint32_t i = 0; i < bind->bindCount; ++i) {
2889                 struct radv_device_memory *mem = NULL;
2890
2891                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2892                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2893
2894                 device->ws->buffer_virtual_bind(image->bo,
2895                                                 bind->pBinds[i].resourceOffset,
2896                                                 bind->pBinds[i].size,
2897                                                 mem ? mem->bo : NULL,
2898                                                 bind->pBinds[i].memoryOffset);
2899         }
2900 }
2901
2902  VkResult radv_QueueBindSparse(
2903         VkQueue                                     _queue,
2904         uint32_t                                    bindInfoCount,
2905         const VkBindSparseInfo*                     pBindInfo,
2906         VkFence                                     _fence)
2907 {
2908         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2909         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2910         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2911         bool fence_emitted = false;
2912
2913         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2914                 struct radv_winsys_sem_info sem_info;
2915                 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2916                         radv_sparse_buffer_bind_memory(queue->device,
2917                                                        pBindInfo[i].pBufferBinds + j);
2918                 }
2919
2920                 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2921                         radv_sparse_image_opaque_bind_memory(queue->device,
2922                                                              pBindInfo[i].pImageOpaqueBinds + j);
2923                 }
2924
2925                 VkResult result;
2926                 result = radv_alloc_sem_info(&sem_info,
2927                                              pBindInfo[i].waitSemaphoreCount,
2928                                              pBindInfo[i].pWaitSemaphores,
2929                                              pBindInfo[i].signalSemaphoreCount,
2930                                              pBindInfo[i].pSignalSemaphores,
2931                                              _fence);
2932                 if (result != VK_SUCCESS)
2933                         return result;
2934
2935                 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2936                         queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2937                                                      &queue->device->empty_cs[queue->queue_family_index],
2938                                                      1, NULL, NULL,
2939                                                      &sem_info,
2940                                                      false, base_fence);
2941                         fence_emitted = true;
2942                         if (fence)
2943                                 fence->submitted = true;
2944                 }
2945
2946                 radv_free_sem_info(&sem_info);
2947
2948         }
2949
2950         if (fence) {
2951                 if (!fence_emitted) {
2952                         radv_signal_fence(queue, fence);
2953                 }
2954                 fence->submitted = true;
2955         }
2956
2957         return VK_SUCCESS;
2958 }
2959
2960 VkResult radv_CreateFence(
2961         VkDevice                                    _device,
2962         const VkFenceCreateInfo*                    pCreateInfo,
2963         const VkAllocationCallbacks*                pAllocator,
2964         VkFence*                                    pFence)
2965 {
2966         RADV_FROM_HANDLE(radv_device, device, _device);
2967         const VkExportFenceCreateInfoKHR *export =
2968                 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
2969         VkExternalFenceHandleTypeFlagsKHR handleTypes =
2970                 export ? export->handleTypes : 0;
2971
2972         struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2973                                                sizeof(*fence), 8,
2974                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2975
2976         if (!fence)
2977                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2978
2979         fence->submitted = false;
2980         fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2981         fence->temp_syncobj = 0;
2982         if (device->always_use_syncobj || handleTypes) {
2983                 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
2984                 if (ret) {
2985                         vk_free2(&device->alloc, pAllocator, fence);
2986                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2987                 }
2988                 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
2989                         device->ws->signal_syncobj(device->ws, fence->syncobj);
2990                 }
2991                 fence->fence = NULL;
2992         } else {
2993                 fence->fence = device->ws->create_fence();
2994                 if (!fence->fence) {
2995                         vk_free2(&device->alloc, pAllocator, fence);
2996                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2997                 }
2998                 fence->syncobj = 0;
2999         }
3000
3001         *pFence = radv_fence_to_handle(fence);
3002
3003         return VK_SUCCESS;
3004 }
3005
3006 void radv_DestroyFence(
3007         VkDevice                                    _device,
3008         VkFence                                     _fence,
3009         const VkAllocationCallbacks*                pAllocator)
3010 {
3011         RADV_FROM_HANDLE(radv_device, device, _device);
3012         RADV_FROM_HANDLE(radv_fence, fence, _fence);
3013
3014         if (!fence)
3015                 return;
3016
3017         if (fence->temp_syncobj)
3018                 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3019         if (fence->syncobj)
3020                 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3021         if (fence->fence)
3022                 device->ws->destroy_fence(fence->fence);
3023         vk_free2(&device->alloc, pAllocator, fence);
3024 }
3025
3026
3027 static uint64_t radv_get_current_time()
3028 {
3029         struct timespec tv;
3030         clock_gettime(CLOCK_MONOTONIC, &tv);
3031         return tv.tv_nsec + tv.tv_sec*1000000000ull;
3032 }
3033
3034 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3035 {
3036         uint64_t current_time = radv_get_current_time();
3037
3038         timeout = MIN2(UINT64_MAX - current_time, timeout);
3039
3040         return current_time + timeout;
3041 }
3042
3043
3044 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
3045 {
3046         for (uint32_t i = 0; i < fenceCount; ++i) {
3047                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3048                 if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
3049                         return false;
3050         }
3051         return true;
3052 }
3053
3054 VkResult radv_WaitForFences(
3055         VkDevice                                    _device,
3056         uint32_t                                    fenceCount,
3057         const VkFence*                              pFences,
3058         VkBool32                                    waitAll,
3059         uint64_t                                    timeout)
3060 {
3061         RADV_FROM_HANDLE(radv_device, device, _device);
3062         timeout = radv_get_absolute_timeout(timeout);
3063
3064         if (device->always_use_syncobj) {
3065                 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3066                 if (!handles)
3067                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3068
3069                 for (uint32_t i = 0; i < fenceCount; ++i) {
3070                         RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3071                         handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3072                 }
3073
3074                 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3075
3076                 free(handles);
3077                 return success ? VK_SUCCESS : VK_TIMEOUT;
3078         }
3079
3080         if (!waitAll && fenceCount > 1) {
3081                 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3082                 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3083                         uint32_t wait_count = 0;
3084                         struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3085                         if (!fences)
3086                                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3087
3088                         for (uint32_t i = 0; i < fenceCount; ++i) {
3089                                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3090
3091                                 if (fence->signalled) {
3092                                         free(fences);
3093                                         return VK_SUCCESS;
3094                                 }
3095
3096                                 fences[wait_count++] = fence->fence;
3097                         }
3098
3099                         bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3100                                                                waitAll, timeout - radv_get_current_time());
3101
3102                         free(fences);
3103                         return success ? VK_SUCCESS : VK_TIMEOUT;
3104                 }
3105
3106                 while(radv_get_current_time() <= timeout) {
3107                         for (uint32_t i = 0; i < fenceCount; ++i) {
3108                                 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3109                                         return VK_SUCCESS;
3110                         }
3111                 }
3112                 return VK_TIMEOUT;
3113         }
3114
3115         for (uint32_t i = 0; i < fenceCount; ++i) {
3116                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3117                 bool expired = false;
3118
3119                 if (fence->temp_syncobj) {
3120                         if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3121                                 return VK_TIMEOUT;
3122                         continue;
3123                 }
3124
3125                 if (fence->syncobj) {
3126                         if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3127                                 return VK_TIMEOUT;
3128                         continue;
3129                 }
3130
3131                 if (fence->signalled)
3132                         continue;
3133
3134                 if (!fence->submitted) {
3135                         while(radv_get_current_time() <= timeout && !fence->submitted)
3136                                 /* Do nothing */;
3137
3138                         if (!fence->submitted)
3139                                 return VK_TIMEOUT;
3140
3141                         /* Recheck as it may have been set by submitting operations. */
3142                         if (fence->signalled)
3143                                 continue;
3144                 }
3145
3146                 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
3147                 if (!expired)
3148                         return VK_TIMEOUT;
3149
3150                 fence->signalled = true;
3151         }
3152
3153         return VK_SUCCESS;
3154 }
3155
3156 VkResult radv_ResetFences(VkDevice _device,
3157                           uint32_t fenceCount,
3158                           const VkFence *pFences)
3159 {
3160         RADV_FROM_HANDLE(radv_device, device, _device);
3161
3162         for (unsigned i = 0; i < fenceCount; ++i) {
3163                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3164                 fence->submitted = fence->signalled = false;
3165
3166                 /* Per spec, we first restore the permanent payload, and then reset, so
3167                  * having a temp syncobj should not skip resetting the permanent syncobj. */
3168                 if (fence->temp_syncobj) {
3169                         device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3170                         fence->temp_syncobj = 0;
3171                 }
3172
3173                 if (fence->syncobj) {
3174                         device->ws->reset_syncobj(device->ws, fence->syncobj);
3175                 }
3176         }
3177
3178         return VK_SUCCESS;
3179 }
3180
3181 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3182 {
3183         RADV_FROM_HANDLE(radv_device, device, _device);
3184         RADV_FROM_HANDLE(radv_fence, fence, _fence);
3185
3186         if (fence->temp_syncobj) {
3187                         bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3188                         return success ? VK_SUCCESS : VK_NOT_READY;
3189         }
3190
3191         if (fence->syncobj) {
3192                         bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3193                         return success ? VK_SUCCESS : VK_NOT_READY;
3194         }
3195
3196         if (fence->signalled)
3197                 return VK_SUCCESS;
3198         if (!fence->submitted)
3199                 return VK_NOT_READY;
3200         if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3201                 return VK_NOT_READY;
3202
3203         return VK_SUCCESS;
3204 }
3205
3206
3207 // Queue semaphore functions
3208
3209 VkResult radv_CreateSemaphore(
3210         VkDevice                                    _device,
3211         const VkSemaphoreCreateInfo*                pCreateInfo,
3212         const VkAllocationCallbacks*                pAllocator,
3213         VkSemaphore*                                pSemaphore)
3214 {
3215         RADV_FROM_HANDLE(radv_device, device, _device);
3216         const VkExportSemaphoreCreateInfoKHR *export =
3217                 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3218         VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3219                 export ? export->handleTypes : 0;
3220
3221         struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3222                                                sizeof(*sem), 8,
3223                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3224         if (!sem)
3225                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3226
3227         sem->temp_syncobj = 0;
3228         /* create a syncobject if we are going to export this semaphore */
3229         if (device->always_use_syncobj || handleTypes) {
3230                 assert (device->physical_device->rad_info.has_syncobj);
3231                 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3232                 if (ret) {
3233                         vk_free2(&device->alloc, pAllocator, sem);
3234                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3235                 }
3236                 sem->sem = NULL;
3237         } else {
3238                 sem->sem = device->ws->create_sem(device->ws);
3239                 if (!sem->sem) {
3240                         vk_free2(&device->alloc, pAllocator, sem);
3241                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3242                 }
3243                 sem->syncobj = 0;
3244         }
3245
3246         *pSemaphore = radv_semaphore_to_handle(sem);
3247         return VK_SUCCESS;
3248 }
3249
3250 void radv_DestroySemaphore(
3251         VkDevice                                    _device,
3252         VkSemaphore                                 _semaphore,
3253         const VkAllocationCallbacks*                pAllocator)
3254 {
3255         RADV_FROM_HANDLE(radv_device, device, _device);
3256         RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3257         if (!_semaphore)
3258                 return;
3259
3260         if (sem->syncobj)
3261                 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3262         else
3263                 device->ws->destroy_sem(sem->sem);
3264         vk_free2(&device->alloc, pAllocator, sem);
3265 }
3266
3267 VkResult radv_CreateEvent(
3268         VkDevice                                    _device,
3269         const VkEventCreateInfo*                    pCreateInfo,
3270         const VkAllocationCallbacks*                pAllocator,
3271         VkEvent*                                    pEvent)
3272 {
3273         RADV_FROM_HANDLE(radv_device, device, _device);
3274         struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3275                                                sizeof(*event), 8,
3276                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3277
3278         if (!event)
3279                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3280
3281         event->bo = device->ws->buffer_create(device->ws, 8, 8,
3282                                               RADEON_DOMAIN_GTT,
3283                                               RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3284         if (!event->bo) {
3285                 vk_free2(&device->alloc, pAllocator, event);
3286                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3287         }
3288
3289         event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3290
3291         *pEvent = radv_event_to_handle(event);
3292
3293         return VK_SUCCESS;
3294 }
3295
3296 void radv_DestroyEvent(
3297         VkDevice                                    _device,
3298         VkEvent                                     _event,
3299         const VkAllocationCallbacks*                pAllocator)
3300 {
3301         RADV_FROM_HANDLE(radv_device, device, _device);
3302         RADV_FROM_HANDLE(radv_event, event, _event);
3303
3304         if (!event)
3305                 return;
3306         device->ws->buffer_destroy(event->bo);
3307         vk_free2(&device->alloc, pAllocator, event);
3308 }
3309
3310 VkResult radv_GetEventStatus(
3311         VkDevice                                    _device,
3312         VkEvent                                     _event)
3313 {
3314         RADV_FROM_HANDLE(radv_event, event, _event);
3315
3316         if (*event->map == 1)
3317                 return VK_EVENT_SET;
3318         return VK_EVENT_RESET;
3319 }
3320
3321 VkResult radv_SetEvent(
3322         VkDevice                                    _device,
3323         VkEvent                                     _event)
3324 {
3325         RADV_FROM_HANDLE(radv_event, event, _event);
3326         *event->map = 1;
3327
3328         return VK_SUCCESS;
3329 }
3330
3331 VkResult radv_ResetEvent(
3332     VkDevice                                    _device,
3333     VkEvent                                     _event)
3334 {
3335         RADV_FROM_HANDLE(radv_event, event, _event);
3336         *event->map = 0;
3337
3338         return VK_SUCCESS;
3339 }
3340
3341 VkResult radv_CreateBuffer(
3342         VkDevice                                    _device,
3343         const VkBufferCreateInfo*                   pCreateInfo,
3344         const VkAllocationCallbacks*                pAllocator,
3345         VkBuffer*                                   pBuffer)
3346 {
3347         RADV_FROM_HANDLE(radv_device, device, _device);
3348         struct radv_buffer *buffer;
3349
3350         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3351
3352         buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3353                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3354         if (buffer == NULL)
3355                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3356
3357         buffer->size = pCreateInfo->size;
3358         buffer->usage = pCreateInfo->usage;
3359         buffer->bo = NULL;
3360         buffer->offset = 0;
3361         buffer->flags = pCreateInfo->flags;
3362
3363         buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3364                                                  EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3365
3366         if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3367                 buffer->bo = device->ws->buffer_create(device->ws,
3368                                                        align64(buffer->size, 4096),
3369                                                        4096, 0, RADEON_FLAG_VIRTUAL);
3370                 if (!buffer->bo) {
3371                         vk_free2(&device->alloc, pAllocator, buffer);
3372                         return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3373                 }
3374         }
3375
3376         *pBuffer = radv_buffer_to_handle(buffer);
3377
3378         return VK_SUCCESS;
3379 }
3380
3381 void radv_DestroyBuffer(
3382         VkDevice                                    _device,
3383         VkBuffer                                    _buffer,
3384         const VkAllocationCallbacks*                pAllocator)
3385 {
3386         RADV_FROM_HANDLE(radv_device, device, _device);
3387         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3388
3389         if (!buffer)
3390                 return;
3391
3392         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3393                 device->ws->buffer_destroy(buffer->bo);
3394
3395         vk_free2(&device->alloc, pAllocator, buffer);
3396 }
3397
3398 static inline unsigned
3399 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3400 {
3401         if (stencil)
3402                 return image->surface.u.legacy.stencil_tiling_index[level];
3403         else
3404                 return image->surface.u.legacy.tiling_index[level];
3405 }
3406
3407 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3408 {
3409         return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3410 }
3411
3412 static void
3413 radv_initialise_color_surface(struct radv_device *device,
3414                               struct radv_color_buffer_info *cb,
3415                               struct radv_image_view *iview)
3416 {
3417         const struct vk_format_description *desc;
3418         unsigned ntype, format, swap, endian;
3419         unsigned blend_clamp = 0, blend_bypass = 0;
3420         uint64_t va;
3421         const struct radeon_surf *surf = &iview->image->surface;
3422
3423         desc = vk_format_description(iview->vk_format);
3424
3425         memset(cb, 0, sizeof(*cb));
3426
3427         /* Intensity is implemented as Red, so treat it that way. */
3428         cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3429
3430         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3431
3432         cb->cb_color_base = va >> 8;
3433
3434         if (device->physical_device->rad_info.chip_class >= GFX9) {
3435                 struct gfx9_surf_meta_flags meta;
3436                 if (iview->image->dcc_offset)
3437                         meta = iview->image->surface.u.gfx9.dcc;
3438                 else
3439                         meta = iview->image->surface.u.gfx9.cmask;
3440
3441                 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3442                         S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3443                         S_028C74_RB_ALIGNED(meta.rb_aligned) |
3444                         S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3445
3446                 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3447                 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3448         } else {
3449                 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3450                 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3451
3452                 cb->cb_color_base += level_info->offset >> 8;
3453                 if (level_info->mode == RADEON_SURF_MODE_2D)
3454                         cb->cb_color_base |= iview->image->surface.tile_swizzle;
3455
3456                 pitch_tile_max = level_info->nblk_x / 8 - 1;
3457                 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3458                 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3459
3460                 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3461                 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3462                 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3463
3464                 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3465
3466                 if (radv_image_has_fmask(iview->image)) {
3467                         if (device->physical_device->rad_info.chip_class >= CIK)
3468                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3469                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3470                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3471                 } else {
3472                         /* This must be set for fast clear to work without FMASK. */
3473                         if (device->physical_device->rad_info.chip_class >= CIK)
3474                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3475                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3476                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3477                 }
3478         }
3479
3480         /* CMASK variables */
3481         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3482         va += iview->image->cmask.offset;
3483         cb->cb_color_cmask = va >> 8;
3484
3485         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3486         va += iview->image->dcc_offset;
3487         cb->cb_dcc_base = va >> 8;
3488         cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3489
3490         uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3491         cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3492                 S_028C6C_SLICE_MAX(max_slice);
3493
3494         if (iview->image->info.samples > 1) {
3495                 unsigned log_samples = util_logbase2(iview->image->info.samples);
3496
3497                 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3498                         S_028C74_NUM_FRAGMENTS(log_samples);
3499         }
3500
3501         if (radv_image_has_fmask(iview->image)) {
3502                 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3503                 cb->cb_color_fmask = va >> 8;
3504                 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3505         } else {
3506                 cb->cb_color_fmask = cb->cb_color_base;
3507         }
3508
3509         ntype = radv_translate_color_numformat(iview->vk_format,
3510                                                desc,
3511                                                vk_format_get_first_non_void_channel(iview->vk_format));
3512         format = radv_translate_colorformat(iview->vk_format);
3513         if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3514                 radv_finishme("Illegal color\n");
3515         swap = radv_translate_colorswap(iview->vk_format, FALSE);
3516         endian = radv_colorformat_endian_swap(format);
3517
3518         /* blend clamp should be set for all NORM/SRGB types */
3519         if (ntype == V_028C70_NUMBER_UNORM ||
3520             ntype == V_028C70_NUMBER_SNORM ||
3521             ntype == V_028C70_NUMBER_SRGB)
3522                 blend_clamp = 1;
3523
3524         /* set blend bypass according to docs if SINT/UINT or
3525            8/24 COLOR variants */
3526         if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3527             format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3528             format == V_028C70_COLOR_X24_8_32_FLOAT) {
3529                 blend_clamp = 0;
3530                 blend_bypass = 1;
3531         }
3532 #if 0
3533         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3534             (format == V_028C70_COLOR_8 ||
3535              format == V_028C70_COLOR_8_8 ||
3536              format == V_028C70_COLOR_8_8_8_8))
3537                 ->color_is_int8 = true;
3538 #endif
3539         cb->cb_color_info = S_028C70_FORMAT(format) |
3540                 S_028C70_COMP_SWAP(swap) |
3541                 S_028C70_BLEND_CLAMP(blend_clamp) |
3542                 S_028C70_BLEND_BYPASS(blend_bypass) |
3543                 S_028C70_SIMPLE_FLOAT(1) |
3544                 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3545                                     ntype != V_028C70_NUMBER_SNORM &&
3546                                     ntype != V_028C70_NUMBER_SRGB &&
3547                                     format != V_028C70_COLOR_8_24 &&
3548                                     format != V_028C70_COLOR_24_8) |
3549                 S_028C70_NUMBER_TYPE(ntype) |
3550                 S_028C70_ENDIAN(endian);
3551         if ((iview->image->info.samples > 1) && radv_image_has_fmask(iview->image)) {
3552                 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3553                 if (device->physical_device->rad_info.chip_class == SI) {
3554                         unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3555                         cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3556                 }
3557         }
3558
3559         if (radv_image_has_cmask(iview->image) &&
3560             !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3561                 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3562
3563         if (radv_dcc_enabled(iview->image, iview->base_mip))
3564                 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3565
3566         if (device->physical_device->rad_info.chip_class >= VI) {
3567                 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3568                 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3569                 unsigned independent_64b_blocks = 0;
3570                 unsigned max_compressed_block_size;
3571
3572                 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
3573                    64 for APU because all of our APUs to date use DIMMs which have
3574                    a request granularity size of 64B while all other chips have a
3575                    32B request size */
3576                 if (!device->physical_device->rad_info.has_dedicated_vram)
3577                         min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3578
3579                 if (iview->image->info.samples > 1) {
3580                         if (iview->image->surface.bpe == 1)
3581                                 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3582                         else if (iview->image->surface.bpe == 2)
3583                                 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3584                 }
3585
3586                 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3587                                            VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3588                         independent_64b_blocks = 1;
3589                         max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3590                 } else
3591                         max_compressed_block_size = max_uncompressed_block_size;
3592
3593                 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3594                         S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3595                         S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3596                         S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3597         }
3598
3599         /* This must be set for fast clear to work without FMASK. */
3600         if (!radv_image_has_fmask(iview->image) &&
3601             device->physical_device->rad_info.chip_class == SI) {
3602                 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3603                 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3604         }
3605
3606         if (device->physical_device->rad_info.chip_class >= GFX9) {
3607                 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3608                   (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3609
3610                 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3611                 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3612                         S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3613                 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3614                         S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3615                         S_028C68_MAX_MIP(iview->image->info.levels - 1);
3616         }
3617 }
3618
3619 static unsigned
3620 radv_calc_decompress_on_z_planes(struct radv_device *device,
3621                                  struct radv_image_view *iview)
3622 {
3623         unsigned max_zplanes = 0;
3624
3625         assert(iview->image->tc_compatible_htile);
3626
3627         if (device->physical_device->rad_info.chip_class >= GFX9) {
3628                 /* Default value for 32-bit depth surfaces. */
3629                 max_zplanes = 4;
3630
3631                 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3632                     iview->image->info.samples > 1)
3633                         max_zplanes = 2;
3634
3635                 max_zplanes = max_zplanes + 1;
3636         } else {
3637                 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
3638                         /* Do not enable Z plane compression for 16-bit depth
3639                          * surfaces because isn't supported on GFX8. Only
3640                          * 32-bit depth surfaces are supported by the hardware.
3641                          * This allows to maintain shader compatibility and to
3642                          * reduce the number of depth decompressions.
3643                          */
3644                         max_zplanes = 1;
3645                 } else {
3646                         if (iview->image->info.samples <= 1)
3647                                 max_zplanes = 5;
3648                         else if (iview->image->info.samples <= 4)
3649                                 max_zplanes = 3;
3650                         else
3651                                 max_zplanes = 2;
3652                 }
3653         }
3654
3655         return max_zplanes;
3656 }
3657
3658 static void
3659 radv_initialise_ds_surface(struct radv_device *device,
3660                            struct radv_ds_buffer_info *ds,
3661                            struct radv_image_view *iview)
3662 {
3663         unsigned level = iview->base_mip;
3664         unsigned format, stencil_format;
3665         uint64_t va, s_offs, z_offs;
3666         bool stencil_only = false;
3667         memset(ds, 0, sizeof(*ds));
3668         switch (iview->image->vk_format) {
3669         case VK_FORMAT_D24_UNORM_S8_UINT:
3670         case VK_FORMAT_X8_D24_UNORM_PACK32:
3671                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3672                 ds->offset_scale = 2.0f;
3673                 break;
3674         case VK_FORMAT_D16_UNORM:
3675         case VK_FORMAT_D16_UNORM_S8_UINT:
3676                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3677                 ds->offset_scale = 4.0f;
3678                 break;
3679         case VK_FORMAT_D32_SFLOAT:
3680         case VK_FORMAT_D32_SFLOAT_S8_UINT:
3681                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3682                         S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3683                 ds->offset_scale = 1.0f;
3684                 break;
3685         case VK_FORMAT_S8_UINT:
3686                 stencil_only = true;
3687                 break;
3688         default:
3689                 break;
3690         }
3691
3692         format = radv_translate_dbformat(iview->image->vk_format);
3693         stencil_format = iview->image->surface.has_stencil ?
3694                 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3695
3696         uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3697         ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3698                 S_028008_SLICE_MAX(max_slice);
3699
3700         ds->db_htile_data_base = 0;
3701         ds->db_htile_surface = 0;
3702
3703         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3704         s_offs = z_offs = va;
3705
3706         if (device->physical_device->rad_info.chip_class >= GFX9) {
3707                 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3708                 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3709
3710                 ds->db_z_info = S_028038_FORMAT(format) |
3711                         S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3712                         S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3713                         S_028038_MAXMIP(iview->image->info.levels - 1);
3714                 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3715                         S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3716
3717                 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3718                 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3719                 ds->db_depth_view |= S_028008_MIPID(level);
3720
3721                 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3722                         S_02801C_Y_MAX(iview->image->info.height - 1);
3723
3724                 if (radv_htile_enabled(iview->image, level)) {
3725                         ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3726
3727                         if (iview->image->tc_compatible_htile) {
3728                                 unsigned max_zplanes =
3729                                         radv_calc_decompress_on_z_planes(device, iview);
3730
3731                                 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
3732                                                  S_028038_ITERATE_FLUSH(1);
3733                                 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3734                         }
3735
3736                         if (!iview->image->surface.has_stencil)
3737                                 /* Use all of the htile_buffer for depth if there's no stencil. */
3738                                 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3739                         va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3740                                 iview->image->htile_offset;
3741                         ds->db_htile_data_base = va >> 8;
3742                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3743                                 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3744                                 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3745                 }
3746         } else {
3747                 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3748
3749                 if (stencil_only)
3750                         level_info = &iview->image->surface.u.legacy.stencil_level[level];
3751
3752                 z_offs += iview->image->surface.u.legacy.level[level].offset;
3753                 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3754
3755                 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3756                 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3757                 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3758
3759                 if (iview->image->info.samples > 1)
3760                         ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3761
3762                 if (device->physical_device->rad_info.chip_class >= CIK) {
3763                         struct radeon_info *info = &device->physical_device->rad_info;
3764                         unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3765                         unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3766                         unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3767                         unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3768                         unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3769                         unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3770
3771                         if (stencil_only)
3772                                 tile_mode = stencil_tile_mode;
3773
3774                         ds->db_depth_info |=
3775                                 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3776                                 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3777                                 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3778                                 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3779                                 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3780                                 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3781                         ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3782                         ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3783                 } else {
3784                         unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3785                         ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3786                         tile_mode_index = si_tile_mode_index(iview->image, level, true);
3787                         ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3788                         if (stencil_only)
3789                                 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3790                 }
3791
3792                 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3793                         S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3794                 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3795
3796                 if (radv_htile_enabled(iview->image, level)) {
3797                         ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3798
3799                         if (!iview->image->surface.has_stencil &&
3800                             !iview->image->tc_compatible_htile)
3801                                 /* Use all of the htile_buffer for depth if there's no stencil. */
3802                                 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3803
3804                         va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3805                                 iview->image->htile_offset;
3806                         ds->db_htile_data_base = va >> 8;
3807                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3808
3809                         if (iview->image->tc_compatible_htile) {
3810                                 unsigned max_zplanes =
3811                                         radv_calc_decompress_on_z_planes(device, iview);
3812
3813                                 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3814                                 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
3815                         }
3816                 }
3817         }
3818
3819         ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3820         ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3821 }
3822
3823 VkResult radv_CreateFramebuffer(
3824         VkDevice                                    _device,
3825         const VkFramebufferCreateInfo*              pCreateInfo,
3826         const VkAllocationCallbacks*                pAllocator,
3827         VkFramebuffer*                              pFramebuffer)
3828 {
3829         RADV_FROM_HANDLE(radv_device, device, _device);
3830         struct radv_framebuffer *framebuffer;
3831
3832         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3833
3834         size_t size = sizeof(*framebuffer) +
3835                 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3836         framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3837                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3838         if (framebuffer == NULL)
3839                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3840
3841         framebuffer->attachment_count = pCreateInfo->attachmentCount;
3842         framebuffer->width = pCreateInfo->width;
3843         framebuffer->height = pCreateInfo->height;
3844         framebuffer->layers = pCreateInfo->layers;
3845         for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3846                 VkImageView _iview = pCreateInfo->pAttachments[i];
3847                 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3848                 framebuffer->attachments[i].attachment = iview;
3849                 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3850                         radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3851                 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3852                         radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3853                 }
3854                 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3855                 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3856                 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
3857         }
3858
3859         *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3860         return VK_SUCCESS;
3861 }
3862
3863 void radv_DestroyFramebuffer(
3864         VkDevice                                    _device,
3865         VkFramebuffer                               _fb,
3866         const VkAllocationCallbacks*                pAllocator)
3867 {
3868         RADV_FROM_HANDLE(radv_device, device, _device);
3869         RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3870
3871         if (!fb)
3872                 return;
3873         vk_free2(&device->alloc, pAllocator, fb);
3874 }
3875
3876 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3877 {
3878         switch (address_mode) {
3879         case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3880                 return V_008F30_SQ_TEX_WRAP;
3881         case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3882                 return V_008F30_SQ_TEX_MIRROR;
3883         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3884                 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3885         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3886                 return V_008F30_SQ_TEX_CLAMP_BORDER;
3887         case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3888                 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3889         default:
3890                 unreachable("illegal tex wrap mode");
3891                 break;
3892         }
3893 }
3894
3895 static unsigned
3896 radv_tex_compare(VkCompareOp op)
3897 {
3898         switch (op) {
3899         case VK_COMPARE_OP_NEVER:
3900                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3901         case VK_COMPARE_OP_LESS:
3902                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3903         case VK_COMPARE_OP_EQUAL:
3904                 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3905         case VK_COMPARE_OP_LESS_OR_EQUAL:
3906                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3907         case VK_COMPARE_OP_GREATER:
3908                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3909         case VK_COMPARE_OP_NOT_EQUAL:
3910                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3911         case VK_COMPARE_OP_GREATER_OR_EQUAL:
3912                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3913         case VK_COMPARE_OP_ALWAYS:
3914                 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3915         default:
3916                 unreachable("illegal compare mode");
3917                 break;
3918         }
3919 }
3920
3921 static unsigned
3922 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3923 {
3924         switch (filter) {
3925         case VK_FILTER_NEAREST:
3926                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3927                         V_008F38_SQ_TEX_XY_FILTER_POINT);
3928         case VK_FILTER_LINEAR:
3929                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3930                         V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3931         case VK_FILTER_CUBIC_IMG:
3932         default:
3933                 fprintf(stderr, "illegal texture filter");
3934                 return 0;
3935         }
3936 }
3937
3938 static unsigned
3939 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3940 {
3941         switch (mode) {
3942         case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3943                 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3944         case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3945                 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3946         default:
3947                 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3948         }
3949 }
3950
3951 static unsigned
3952 radv_tex_bordercolor(VkBorderColor bcolor)
3953 {
3954         switch (bcolor) {
3955         case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3956         case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3957                 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3958         case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3959         case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3960                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3961         case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3962         case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3963                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3964         default:
3965                 break;
3966         }
3967         return 0;
3968 }
3969
3970 static unsigned
3971 radv_tex_aniso_filter(unsigned filter)
3972 {
3973         if (filter < 2)
3974                 return 0;
3975         if (filter < 4)
3976                 return 1;
3977         if (filter < 8)
3978                 return 2;
3979         if (filter < 16)
3980                 return 3;
3981         return 4;
3982 }
3983
3984 static unsigned
3985 radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
3986 {
3987         switch (mode) {
3988         case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
3989                 return SQ_IMG_FILTER_MODE_BLEND;
3990         case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
3991                 return SQ_IMG_FILTER_MODE_MIN;
3992         case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
3993                 return SQ_IMG_FILTER_MODE_MAX;
3994         default:
3995                 break;
3996         }
3997         return 0;
3998 }
3999
4000 static void
4001 radv_init_sampler(struct radv_device *device,
4002                   struct radv_sampler *sampler,
4003                   const VkSamplerCreateInfo *pCreateInfo)
4004 {
4005         uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
4006                                         (uint32_t) pCreateInfo->maxAnisotropy : 0;
4007         uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
4008         bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
4009         unsigned filter_mode = SQ_IMG_FILTER_MODE_BLEND;
4010
4011         const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
4012                 vk_find_struct_const(pCreateInfo->pNext,
4013                                      SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
4014         if (sampler_reduction)
4015                 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
4016
4017         sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
4018                              S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
4019                              S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
4020                              S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
4021                              S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
4022                              S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
4023                              S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
4024                              S_008F30_ANISO_BIAS(max_aniso_ratio) |
4025                              S_008F30_DISABLE_CUBE_WRAP(0) |
4026                              S_008F30_COMPAT_MODE(is_vi) |
4027                              S_008F30_FILTER_MODE(filter_mode));
4028         sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
4029                              S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
4030                              S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
4031         sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
4032                              S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
4033                              S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
4034                              S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
4035                              S_008F38_MIP_POINT_PRECLAMP(0) |
4036                              S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
4037                              S_008F38_FILTER_PREC_FIX(1) |
4038                              S_008F38_ANISO_OVERRIDE(is_vi));
4039         sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
4040                              S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
4041 }
4042
4043 VkResult radv_CreateSampler(
4044         VkDevice                                    _device,
4045         const VkSamplerCreateInfo*                  pCreateInfo,
4046         const VkAllocationCallbacks*                pAllocator,
4047         VkSampler*                                  pSampler)
4048 {
4049         RADV_FROM_HANDLE(radv_device, device, _device);
4050         struct radv_sampler *sampler;
4051
4052         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
4053
4054         sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
4055                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4056         if (!sampler)
4057                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
4058
4059         radv_init_sampler(device, sampler, pCreateInfo);
4060         *pSampler = radv_sampler_to_handle(sampler);
4061
4062         return VK_SUCCESS;
4063 }
4064
4065 void radv_DestroySampler(
4066         VkDevice                                    _device,
4067         VkSampler                                   _sampler,
4068         const VkAllocationCallbacks*                pAllocator)
4069 {
4070         RADV_FROM_HANDLE(radv_device, device, _device);
4071         RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
4072
4073         if (!sampler)
4074                 return;
4075         vk_free2(&device->alloc, pAllocator, sampler);
4076 }
4077
4078 /* vk_icd.h does not declare this function, so we declare it here to
4079  * suppress Wmissing-prototypes.
4080  */
4081 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4082 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
4083
4084 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4085 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
4086 {
4087         /* For the full details on loader interface versioning, see
4088         * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4089         * What follows is a condensed summary, to help you navigate the large and
4090         * confusing official doc.
4091         *
4092         *   - Loader interface v0 is incompatible with later versions. We don't
4093         *     support it.
4094         *
4095         *   - In loader interface v1:
4096         *       - The first ICD entrypoint called by the loader is
4097         *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4098         *         entrypoint.
4099         *       - The ICD must statically expose no other Vulkan symbol unless it is
4100         *         linked with -Bsymbolic.
4101         *       - Each dispatchable Vulkan handle created by the ICD must be
4102         *         a pointer to a struct whose first member is VK_LOADER_DATA. The
4103         *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4104         *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4105         *         vkDestroySurfaceKHR(). The ICD must be capable of working with
4106         *         such loader-managed surfaces.
4107         *
4108         *    - Loader interface v2 differs from v1 in:
4109         *       - The first ICD entrypoint called by the loader is
4110         *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4111         *         statically expose this entrypoint.
4112         *
4113         *    - Loader interface v3 differs from v2 in:
4114         *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4115         *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4116         *          because the loader no longer does so.
4117         */
4118         *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4119         return VK_SUCCESS;
4120 }
4121
4122 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4123                              const VkMemoryGetFdInfoKHR *pGetFdInfo,
4124                              int *pFD)
4125 {
4126         RADV_FROM_HANDLE(radv_device, device, _device);
4127         RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4128
4129         assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4130
4131         /* At the moment, we support only the below handle types. */
4132         assert(pGetFdInfo->handleType ==
4133                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4134                pGetFdInfo->handleType ==
4135                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4136
4137         bool ret = radv_get_memory_fd(device, memory, pFD);
4138         if (ret == false)
4139                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
4140         return VK_SUCCESS;
4141 }
4142
4143 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4144                                        VkExternalMemoryHandleTypeFlagBitsKHR handleType,
4145                                        int fd,
4146                                        VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4147 {
4148    switch (handleType) {
4149    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4150       pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4151       return VK_SUCCESS;
4152
4153    default:
4154       /* The valid usage section for this function says:
4155        *
4156        *    "handleType must not be one of the handle types defined as
4157        *    opaque."
4158        *
4159        * So opaque handle types fall into the default "unsupported" case.
4160        */
4161       return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4162    }
4163 }
4164
4165 static VkResult radv_import_opaque_fd(struct radv_device *device,
4166                                       int fd,
4167                                       uint32_t *syncobj)
4168 {
4169         uint32_t syncobj_handle = 0;
4170         int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4171         if (ret != 0)
4172                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4173
4174         if (*syncobj)
4175                 device->ws->destroy_syncobj(device->ws, *syncobj);
4176
4177         *syncobj = syncobj_handle;
4178         close(fd);
4179
4180         return VK_SUCCESS;
4181 }
4182
4183 static VkResult radv_import_sync_fd(struct radv_device *device,
4184                                     int fd,
4185                                     uint32_t *syncobj)
4186 {
4187         /* If we create a syncobj we do it locally so that if we have an error, we don't
4188          * leave a syncobj in an undetermined state in the fence. */
4189         uint32_t syncobj_handle =  *syncobj;
4190         if (!syncobj_handle) {
4191                 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4192                 if (ret) {
4193                         return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4194                 }
4195         }
4196
4197         if (fd == -1) {
4198                 device->ws->signal_syncobj(device->ws, syncobj_handle);
4199         } else {
4200                 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4201         if (ret != 0)
4202                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4203         }
4204
4205         *syncobj = syncobj_handle;
4206         if (fd != -1)
4207                 close(fd);
4208
4209         return VK_SUCCESS;
4210 }
4211
4212 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4213                                    const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4214 {
4215         RADV_FROM_HANDLE(radv_device, device, _device);
4216         RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4217         uint32_t *syncobj_dst = NULL;
4218
4219         if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
4220                 syncobj_dst = &sem->temp_syncobj;
4221         } else {
4222                 syncobj_dst = &sem->syncobj;
4223         }
4224
4225         switch(pImportSemaphoreFdInfo->handleType) {
4226                 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4227                         return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4228                 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4229                         return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4230                 default:
4231                         unreachable("Unhandled semaphore handle type");
4232         }
4233 }
4234
4235 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4236                                 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4237                                 int *pFd)
4238 {
4239         RADV_FROM_HANDLE(radv_device, device, _device);
4240         RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4241         int ret;
4242         uint32_t syncobj_handle;
4243
4244         if (sem->temp_syncobj)
4245                 syncobj_handle = sem->temp_syncobj;
4246         else
4247                 syncobj_handle = sem->syncobj;
4248
4249         switch(pGetFdInfo->handleType) {
4250         case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4251                 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4252                 break;
4253         case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4254                 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4255                 if (!ret) {
4256                         if (sem->temp_syncobj) {
4257                                 close (sem->temp_syncobj);
4258                                 sem->temp_syncobj = 0;
4259                         } else {
4260                                 device->ws->reset_syncobj(device->ws, syncobj_handle);
4261                         }
4262                 }
4263                 break;
4264         default:
4265                 unreachable("Unhandled semaphore handle type");
4266         }
4267
4268         if (ret)
4269                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4270         return VK_SUCCESS;
4271 }
4272
4273 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4274         VkPhysicalDevice                            physicalDevice,
4275         const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4276         VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
4277 {
4278         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4279
4280         /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4281         if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4282             (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
4283              pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4284                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4285                 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4286                 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4287                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4288         } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4289                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4290                 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4291                 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4292                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4293         } else {
4294                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4295                 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4296                 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4297         }
4298 }
4299
4300 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4301                                    const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4302 {
4303         RADV_FROM_HANDLE(radv_device, device, _device);
4304         RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4305         uint32_t *syncobj_dst = NULL;
4306
4307
4308         if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4309                 syncobj_dst = &fence->temp_syncobj;
4310         } else {
4311                 syncobj_dst = &fence->syncobj;
4312         }
4313
4314         switch(pImportFenceFdInfo->handleType) {
4315                 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4316                         return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4317                 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4318                         return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4319                 default:
4320                         unreachable("Unhandled fence handle type");
4321         }
4322 }
4323
4324 VkResult radv_GetFenceFdKHR(VkDevice _device,
4325                                 const VkFenceGetFdInfoKHR *pGetFdInfo,
4326                                 int *pFd)
4327 {
4328         RADV_FROM_HANDLE(radv_device, device, _device);
4329         RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4330         int ret;
4331         uint32_t syncobj_handle;
4332
4333         if (fence->temp_syncobj)
4334                 syncobj_handle = fence->temp_syncobj;
4335         else
4336                 syncobj_handle = fence->syncobj;
4337
4338         switch(pGetFdInfo->handleType) {
4339         case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4340                 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4341                 break;
4342         case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4343                 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4344                 if (!ret) {
4345                         if (fence->temp_syncobj) {
4346                                 close (fence->temp_syncobj);
4347                                 fence->temp_syncobj = 0;
4348                         } else {
4349                                 device->ws->reset_syncobj(device->ws, syncobj_handle);
4350                         }
4351                 }
4352                 break;
4353         default:
4354                 unreachable("Unhandled fence handle type");
4355         }
4356
4357         if (ret)
4358                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4359         return VK_SUCCESS;
4360 }
4361
4362 void radv_GetPhysicalDeviceExternalFenceProperties(
4363         VkPhysicalDevice                            physicalDevice,
4364         const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4365         VkExternalFencePropertiesKHR*           pExternalFenceProperties)
4366 {
4367         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4368
4369         if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4370             (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
4371              pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4372                 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4373                 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4374                 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4375                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4376         } else {
4377                 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4378                 pExternalFenceProperties->compatibleHandleTypes = 0;
4379                 pExternalFenceProperties->externalFenceFeatures = 0;
4380         }
4381 }
4382
4383 VkResult
4384 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4385                                  const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4386                                  const VkAllocationCallbacks* pAllocator,
4387                                  VkDebugReportCallbackEXT* pCallback)
4388 {
4389         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4390         return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4391                                                pCreateInfo, pAllocator, &instance->alloc,
4392                                                pCallback);
4393 }
4394
4395 void
4396 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4397                                   VkDebugReportCallbackEXT _callback,
4398                                   const VkAllocationCallbacks* pAllocator)
4399 {
4400         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4401         vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4402                                          _callback, pAllocator, &instance->alloc);
4403 }
4404
4405 void
4406 radv_DebugReportMessageEXT(VkInstance _instance,
4407                           VkDebugReportFlagsEXT flags,
4408                           VkDebugReportObjectTypeEXT objectType,
4409                           uint64_t object,
4410                           size_t location,
4411                           int32_t messageCode,
4412                           const char* pLayerPrefix,
4413                           const char* pMessage)
4414 {
4415         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4416         vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4417                         object, location, messageCode, pLayerPrefix, pMessage);
4418 }
4419
4420 void
4421 radv_GetDeviceGroupPeerMemoryFeatures(
4422     VkDevice                                    device,
4423     uint32_t                                    heapIndex,
4424     uint32_t                                    localDeviceIndex,
4425     uint32_t                                    remoteDeviceIndex,
4426     VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
4427 {
4428         assert(localDeviceIndex == remoteDeviceIndex);
4429
4430         *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
4431                                VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
4432                                VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
4433                                VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
4434 }