OSDN Git Service

radv: Track enabled extensions.
[android-x86/external-mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52         uint32_t mesa_timestamp, llvm_timestamp;
53         uint16_t f = family;
54         memset(uuid, 0, VK_UUID_SIZE);
55         if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56             !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57                 return -1;
58
59         memcpy(uuid, &mesa_timestamp, 4);
60         memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61         memcpy((char*)uuid + 8, &f, 2);
62         snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63         return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69         ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75         ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static void
79 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
80 {
81         const char *chip_string;
82         char llvm_string[32] = {};
83
84         switch (family) {
85         case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
86         case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
87         case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
88         case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
89         case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
90         case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
91         case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
92         case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
93         case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
94         case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
95         case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
96         case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
97         case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
98         case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
99         case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
100         case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
101         case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
102         case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
103         case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
104         case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
105         default: chip_string = "AMD RADV unknown"; break;
106         }
107
108         if (HAVE_LLVM > 0) {
109                 snprintf(llvm_string, sizeof(llvm_string),
110                          " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
111                          HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
112         }
113
114         snprintf(name, name_len, "%s%s", chip_string, llvm_string);
115 }
116
117 static void
118 radv_physical_device_init_mem_types(struct radv_physical_device *device)
119 {
120         STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
121         uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
122                                           device->rad_info.vram_vis_size);
123
124         int vram_index = -1, visible_vram_index = -1, gart_index = -1;
125         device->memory_properties.memoryHeapCount = 0;
126         if (device->rad_info.vram_size - visible_vram_size > 0) {
127                 vram_index = device->memory_properties.memoryHeapCount++;
128                 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
129                         .size = device->rad_info.vram_size - visible_vram_size,
130                         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
131                 };
132         }
133         if (visible_vram_size) {
134                 visible_vram_index = device->memory_properties.memoryHeapCount++;
135                 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
136                         .size = visible_vram_size,
137                         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
138                 };
139         }
140         if (device->rad_info.gart_size > 0) {
141                 gart_index = device->memory_properties.memoryHeapCount++;
142                 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
143                         .size = device->rad_info.gart_size,
144                         .flags = 0,
145                 };
146         }
147
148         STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
149         unsigned type_count = 0;
150         if (vram_index >= 0) {
151                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
152                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
153                         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
154                         .heapIndex = vram_index,
155                 };
156         }
157         if (gart_index >= 0) {
158                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
159                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
160                         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
161                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
162                         .heapIndex = gart_index,
163                 };
164         }
165         if (visible_vram_index >= 0) {
166                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
167                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
168                         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
169                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
170                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
171                         .heapIndex = visible_vram_index,
172                 };
173         }
174         if (gart_index >= 0) {
175                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
176                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
177                         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
178                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
179                         VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
180                         .heapIndex = gart_index,
181                 };
182         }
183         device->memory_properties.memoryTypeCount = type_count;
184 }
185
186 static void
187 radv_handle_env_var_force_family(struct radv_physical_device *device)
188 {
189         const char *family = getenv("RADV_FORCE_FAMILY");
190         unsigned i;
191
192         if (!family)
193                 return;
194
195         for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
196                 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
197                         /* Override family and chip_class. */
198                         device->rad_info.family = i;
199
200                         if (i >= CHIP_VEGA10)
201                                 device->rad_info.chip_class = GFX9;
202                         else if (i >= CHIP_TONGA)
203                                 device->rad_info.chip_class = VI;
204                         else if (i >= CHIP_BONAIRE)
205                                 device->rad_info.chip_class = CIK;
206                         else
207                                 device->rad_info.chip_class = SI;
208
209                         return;
210                 }
211         }
212
213         fprintf(stderr, "radv: Unknown family: %s\n", family);
214         exit(1);
215 }
216
217 static VkResult
218 radv_physical_device_init(struct radv_physical_device *device,
219                           struct radv_instance *instance,
220                           drmDevicePtr drm_device)
221 {
222         const char *path = drm_device->nodes[DRM_NODE_RENDER];
223         VkResult result;
224         drmVersionPtr version;
225         int fd;
226
227         fd = open(path, O_RDWR | O_CLOEXEC);
228         if (fd < 0)
229                 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
230
231         version = drmGetVersion(fd);
232         if (!version) {
233                 close(fd);
234                 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
235                                  "failed to get version %s: %m", path);
236         }
237
238         if (strcmp(version->name, "amdgpu")) {
239                 drmFreeVersion(version);
240                 close(fd);
241                 return VK_ERROR_INCOMPATIBLE_DRIVER;
242         }
243         drmFreeVersion(version);
244
245         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
246         device->instance = instance;
247         assert(strlen(path) < ARRAY_SIZE(device->path));
248         strncpy(device->path, path, ARRAY_SIZE(device->path));
249
250         device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
251                                                instance->perftest_flags);
252         if (!device->ws) {
253                 result = VK_ERROR_INCOMPATIBLE_DRIVER;
254                 goto fail;
255         }
256
257         device->local_fd = fd;
258         device->ws->query_info(device->ws, &device->rad_info);
259
260         radv_handle_env_var_force_family(device);
261
262         radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
263
264         if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
265                 device->ws->destroy(device->ws);
266                 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
267                                    "cannot generate UUID");
268                 goto fail;
269         }
270
271         /* These flags affect shader compilation. */
272         uint64_t shader_env_flags =
273                 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
274                 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
275
276         /* The gpu id is already embeded in the uuid so we just pass "radv"
277          * when creating the cache.
278          */
279         char buf[VK_UUID_SIZE * 2 + 1];
280         disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
281         device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
282
283         fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
284
285         radv_get_driver_uuid(&device->device_uuid);
286         radv_get_device_uuid(&device->rad_info, &device->device_uuid);
287
288         if (device->rad_info.family == CHIP_STONEY ||
289             device->rad_info.chip_class >= GFX9) {
290                 device->has_rbplus = true;
291                 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
292         }
293
294         /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
295          * on SI.
296          */
297         device->has_clear_state = device->rad_info.chip_class >= CIK;
298
299         device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
300
301         /* Vega10/Raven need a special workaround for a hardware bug. */
302         device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
303                                   device->rad_info.family == CHIP_RAVEN;
304
305         radv_physical_device_init_mem_types(device);
306         radv_fill_device_extension_table(device, &device->supported_extensions);
307
308         result = radv_init_wsi(device);
309         if (result != VK_SUCCESS) {
310                 device->ws->destroy(device->ws);
311                 goto fail;
312         }
313
314         return VK_SUCCESS;
315
316 fail:
317         close(fd);
318         return result;
319 }
320
321 static void
322 radv_physical_device_finish(struct radv_physical_device *device)
323 {
324         radv_finish_wsi(device);
325         device->ws->destroy(device->ws);
326         disk_cache_destroy(device->disk_cache);
327         close(device->local_fd);
328 }
329
330 static void *
331 default_alloc_func(void *pUserData, size_t size, size_t align,
332                    VkSystemAllocationScope allocationScope)
333 {
334         return malloc(size);
335 }
336
337 static void *
338 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
339                      size_t align, VkSystemAllocationScope allocationScope)
340 {
341         return realloc(pOriginal, size);
342 }
343
344 static void
345 default_free_func(void *pUserData, void *pMemory)
346 {
347         free(pMemory);
348 }
349
350 static const VkAllocationCallbacks default_alloc = {
351         .pUserData = NULL,
352         .pfnAllocation = default_alloc_func,
353         .pfnReallocation = default_realloc_func,
354         .pfnFree = default_free_func,
355 };
356
357 static const struct debug_control radv_debug_options[] = {
358         {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
359         {"nodcc", RADV_DEBUG_NO_DCC},
360         {"shaders", RADV_DEBUG_DUMP_SHADERS},
361         {"nocache", RADV_DEBUG_NO_CACHE},
362         {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
363         {"nohiz", RADV_DEBUG_NO_HIZ},
364         {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
365         {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
366         {"allbos", RADV_DEBUG_ALL_BOS},
367         {"noibs", RADV_DEBUG_NO_IBS},
368         {"spirv", RADV_DEBUG_DUMP_SPIRV},
369         {"vmfaults", RADV_DEBUG_VM_FAULTS},
370         {"zerovram", RADV_DEBUG_ZERO_VRAM},
371         {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
372         {"nosisched", RADV_DEBUG_NO_SISCHED},
373         {"preoptir", RADV_DEBUG_PREOPTIR},
374         {NULL, 0}
375 };
376
377 const char *
378 radv_get_debug_option_name(int id)
379 {
380         assert(id < ARRAY_SIZE(radv_debug_options) - 1);
381         return radv_debug_options[id].string;
382 }
383
384 static const struct debug_control radv_perftest_options[] = {
385         {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
386         {"sisched", RADV_PERFTEST_SISCHED},
387         {"localbos", RADV_PERFTEST_LOCAL_BOS},
388         {"binning", RADV_PERFTEST_BINNING},
389         {NULL, 0}
390 };
391
392 const char *
393 radv_get_perftest_option_name(int id)
394 {
395         assert(id < ARRAY_SIZE(radv_debug_options) - 1);
396         return radv_perftest_options[id].string;
397 }
398
399 static void
400 radv_handle_per_app_options(struct radv_instance *instance,
401                             const VkApplicationInfo *info)
402 {
403         const char *name = info ? info->pApplicationName : NULL;
404
405         if (!name)
406                 return;
407
408         if (!strcmp(name, "Talos - Linux - 32bit") ||
409             !strcmp(name, "Talos - Linux - 64bit")) {
410                 /* Force enable LLVM sisched for Talos because it looks safe
411                  * and it gives few more FPS.
412                  */
413                 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
414         }
415 }
416
417 static int radv_get_instance_extension_index(const char *name)
418 {
419         for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
420                 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
421                         return i;
422         }
423         return -1;
424 }
425
426
427 VkResult radv_CreateInstance(
428         const VkInstanceCreateInfo*                 pCreateInfo,
429         const VkAllocationCallbacks*                pAllocator,
430         VkInstance*                                 pInstance)
431 {
432         struct radv_instance *instance;
433         VkResult result;
434
435         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
436
437         uint32_t client_version;
438         if (pCreateInfo->pApplicationInfo &&
439             pCreateInfo->pApplicationInfo->apiVersion != 0) {
440                 client_version = pCreateInfo->pApplicationInfo->apiVersion;
441         } else {
442                 client_version = VK_MAKE_VERSION(1, 0, 0);
443         }
444
445         if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
446             client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
447                 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
448                                  "Client requested version %d.%d.%d",
449                                  VK_VERSION_MAJOR(client_version),
450                                  VK_VERSION_MINOR(client_version),
451                                  VK_VERSION_PATCH(client_version));
452         }
453
454         instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
455                               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
456         if (!instance)
457                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
458
459         instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
460
461         if (pAllocator)
462                 instance->alloc = *pAllocator;
463         else
464                 instance->alloc = default_alloc;
465
466         instance->apiVersion = client_version;
467         instance->physicalDeviceCount = -1;
468
469         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
470                 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
471                 int index = radv_get_instance_extension_index(ext_name);
472
473                 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
474                         vk_free2(&default_alloc, pAllocator, instance);
475                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
476                 }
477
478                 instance->enabled_extensions.extensions[index] = true;
479         }
480
481         result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
482         if (result != VK_SUCCESS) {
483                 vk_free2(&default_alloc, pAllocator, instance);
484                 return vk_error(result);
485         }
486
487         _mesa_locale_init();
488
489         VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
490
491         instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
492                                                    radv_debug_options);
493
494         instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
495                                                    radv_perftest_options);
496
497         radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
498
499         if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
500                 /* Disable sisched when the user requests it, this is mostly
501                  * useful when the driver force-enable sisched for the given
502                  * application.
503                  */
504                 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
505         }
506
507         *pInstance = radv_instance_to_handle(instance);
508
509         return VK_SUCCESS;
510 }
511
512 void radv_DestroyInstance(
513         VkInstance                                  _instance,
514         const VkAllocationCallbacks*                pAllocator)
515 {
516         RADV_FROM_HANDLE(radv_instance, instance, _instance);
517
518         if (!instance)
519                 return;
520
521         for (int i = 0; i < instance->physicalDeviceCount; ++i) {
522                 radv_physical_device_finish(instance->physicalDevices + i);
523         }
524
525         VG(VALGRIND_DESTROY_MEMPOOL(instance));
526
527         _mesa_locale_fini();
528
529         vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
530
531         vk_free(&instance->alloc, instance);
532 }
533
534 static VkResult
535 radv_enumerate_devices(struct radv_instance *instance)
536 {
537         /* TODO: Check for more devices ? */
538         drmDevicePtr devices[8];
539         VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
540         int max_devices;
541
542         instance->physicalDeviceCount = 0;
543
544         max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
545         if (max_devices < 1)
546                 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
547
548         for (unsigned i = 0; i < (unsigned)max_devices; i++) {
549                 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
550                     devices[i]->bustype == DRM_BUS_PCI &&
551                     devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
552
553                         result = radv_physical_device_init(instance->physicalDevices +
554                                                            instance->physicalDeviceCount,
555                                                            instance,
556                                                            devices[i]);
557                         if (result == VK_SUCCESS)
558                                 ++instance->physicalDeviceCount;
559                         else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
560                                 break;
561                 }
562         }
563         drmFreeDevices(devices, max_devices);
564
565         return result;
566 }
567
568 VkResult radv_EnumeratePhysicalDevices(
569         VkInstance                                  _instance,
570         uint32_t*                                   pPhysicalDeviceCount,
571         VkPhysicalDevice*                           pPhysicalDevices)
572 {
573         RADV_FROM_HANDLE(radv_instance, instance, _instance);
574         VkResult result;
575
576         if (instance->physicalDeviceCount < 0) {
577                 result = radv_enumerate_devices(instance);
578                 if (result != VK_SUCCESS &&
579                     result != VK_ERROR_INCOMPATIBLE_DRIVER)
580                         return result;
581         }
582
583         if (!pPhysicalDevices) {
584                 *pPhysicalDeviceCount = instance->physicalDeviceCount;
585         } else {
586                 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
587                 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
588                         pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
589         }
590
591         return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
592                                                                      : VK_SUCCESS;
593 }
594
595 void radv_GetPhysicalDeviceFeatures(
596         VkPhysicalDevice                            physicalDevice,
597         VkPhysicalDeviceFeatures*                   pFeatures)
598 {
599         memset(pFeatures, 0, sizeof(*pFeatures));
600
601         *pFeatures = (VkPhysicalDeviceFeatures) {
602                 .robustBufferAccess                       = true,
603                 .fullDrawIndexUint32                      = true,
604                 .imageCubeArray                           = true,
605                 .independentBlend                         = true,
606                 .geometryShader                           = true,
607                 .tessellationShader                       = true,
608                 .sampleRateShading                        = true,
609                 .dualSrcBlend                             = true,
610                 .logicOp                                  = true,
611                 .multiDrawIndirect                        = true,
612                 .drawIndirectFirstInstance                = true,
613                 .depthClamp                               = true,
614                 .depthBiasClamp                           = true,
615                 .fillModeNonSolid                         = true,
616                 .depthBounds                              = true,
617                 .wideLines                                = true,
618                 .largePoints                              = true,
619                 .alphaToOne                               = true,
620                 .multiViewport                            = true,
621                 .samplerAnisotropy                        = true,
622                 .textureCompressionETC2                   = false,
623                 .textureCompressionASTC_LDR               = false,
624                 .textureCompressionBC                     = true,
625                 .occlusionQueryPrecise                    = true,
626                 .pipelineStatisticsQuery                  = true,
627                 .vertexPipelineStoresAndAtomics           = true,
628                 .fragmentStoresAndAtomics                 = true,
629                 .shaderTessellationAndGeometryPointSize   = true,
630                 .shaderImageGatherExtended                = true,
631                 .shaderStorageImageExtendedFormats        = true,
632                 .shaderStorageImageMultisample            = false,
633                 .shaderUniformBufferArrayDynamicIndexing  = true,
634                 .shaderSampledImageArrayDynamicIndexing   = true,
635                 .shaderStorageBufferArrayDynamicIndexing  = true,
636                 .shaderStorageImageArrayDynamicIndexing   = true,
637                 .shaderStorageImageReadWithoutFormat      = true,
638                 .shaderStorageImageWriteWithoutFormat     = true,
639                 .shaderClipDistance                       = true,
640                 .shaderCullDistance                       = true,
641                 .shaderFloat64                            = true,
642                 .shaderInt64                              = true,
643                 .shaderInt16                              = false,
644                 .sparseBinding                            = true,
645                 .variableMultisampleRate                  = true,
646                 .inheritedQueries                         = true,
647         };
648 }
649
650 void radv_GetPhysicalDeviceFeatures2KHR(
651         VkPhysicalDevice                            physicalDevice,
652         VkPhysicalDeviceFeatures2KHR               *pFeatures)
653 {
654         vk_foreach_struct(ext, pFeatures->pNext) {
655                 switch (ext->sType) {
656                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
657                         VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
658                         features->variablePointersStorageBuffer = true;
659                         features->variablePointers = false;
660                         break;
661                 }
662                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
663                         VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
664                         features->multiview = true;
665                         features->multiviewGeometryShader = true;
666                         features->multiviewTessellationShader = true;
667                         break;
668                 }
669                 default:
670                         break;
671                 }
672         }
673         return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
674 }
675
676 void radv_GetPhysicalDeviceProperties(
677         VkPhysicalDevice                            physicalDevice,
678         VkPhysicalDeviceProperties*                 pProperties)
679 {
680         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
681         VkSampleCountFlags sample_counts = 0xf;
682
683         /* make sure that the entire descriptor set is addressable with a signed
684          * 32-bit int. So the sum of all limits scaled by descriptor size has to
685          * be at most 2 GiB. the combined image & samples object count as one of
686          * both. This limit is for the pipeline layout, not for the set layout, but
687          * there is no set limit, so we just set a pipeline limit. I don't think
688          * any app is going to hit this soon. */
689         size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
690                   (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
691                    32 /* storage buffer, 32 due to potential space wasted on alignment */ +
692                    32 /* sampler, largest when combined with image */ +
693                    64 /* sampled image */ +
694                    64 /* storage image */);
695
696         VkPhysicalDeviceLimits limits = {
697                 .maxImageDimension1D                      = (1 << 14),
698                 .maxImageDimension2D                      = (1 << 14),
699                 .maxImageDimension3D                      = (1 << 11),
700                 .maxImageDimensionCube                    = (1 << 14),
701                 .maxImageArrayLayers                      = (1 << 11),
702                 .maxTexelBufferElements                   = 128 * 1024 * 1024,
703                 .maxUniformBufferRange                    = UINT32_MAX,
704                 .maxStorageBufferRange                    = UINT32_MAX,
705                 .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
706                 .maxMemoryAllocationCount                 = UINT32_MAX,
707                 .maxSamplerAllocationCount                = 64 * 1024,
708                 .bufferImageGranularity                   = 64, /* A cache line */
709                 .sparseAddressSpaceSize                   = 0xffffffffu, /* buffer max size */
710                 .maxBoundDescriptorSets                   = MAX_SETS,
711                 .maxPerStageDescriptorSamplers            = max_descriptor_set_size,
712                 .maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
713                 .maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
714                 .maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
715                 .maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
716                 .maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
717                 .maxPerStageResources                     = max_descriptor_set_size,
718                 .maxDescriptorSetSamplers                 = max_descriptor_set_size,
719                 .maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
720                 .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
721                 .maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
722                 .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
723                 .maxDescriptorSetSampledImages            = max_descriptor_set_size,
724                 .maxDescriptorSetStorageImages            = max_descriptor_set_size,
725                 .maxDescriptorSetInputAttachments         = max_descriptor_set_size,
726                 .maxVertexInputAttributes                 = 32,
727                 .maxVertexInputBindings                   = 32,
728                 .maxVertexInputAttributeOffset            = 2047,
729                 .maxVertexInputBindingStride              = 2048,
730                 .maxVertexOutputComponents                = 128,
731                 .maxTessellationGenerationLevel           = 64,
732                 .maxTessellationPatchSize                 = 32,
733                 .maxTessellationControlPerVertexInputComponents = 128,
734                 .maxTessellationControlPerVertexOutputComponents = 128,
735                 .maxTessellationControlPerPatchOutputComponents = 120,
736                 .maxTessellationControlTotalOutputComponents = 4096,
737                 .maxTessellationEvaluationInputComponents = 128,
738                 .maxTessellationEvaluationOutputComponents = 128,
739                 .maxGeometryShaderInvocations             = 127,
740                 .maxGeometryInputComponents               = 64,
741                 .maxGeometryOutputComponents              = 128,
742                 .maxGeometryOutputVertices                = 256,
743                 .maxGeometryTotalOutputComponents         = 1024,
744                 .maxFragmentInputComponents               = 128,
745                 .maxFragmentOutputAttachments             = 8,
746                 .maxFragmentDualSrcAttachments            = 1,
747                 .maxFragmentCombinedOutputResources       = 8,
748                 .maxComputeSharedMemorySize               = 32768,
749                 .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
750                 .maxComputeWorkGroupInvocations           = 2048,
751                 .maxComputeWorkGroupSize = {
752                         2048,
753                         2048,
754                         2048
755                 },
756                 .subPixelPrecisionBits                    = 4 /* FIXME */,
757                 .subTexelPrecisionBits                    = 4 /* FIXME */,
758                 .mipmapPrecisionBits                      = 4 /* FIXME */,
759                 .maxDrawIndexedIndexValue                 = UINT32_MAX,
760                 .maxDrawIndirectCount                     = UINT32_MAX,
761                 .maxSamplerLodBias                        = 16,
762                 .maxSamplerAnisotropy                     = 16,
763                 .maxViewports                             = MAX_VIEWPORTS,
764                 .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
765                 .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
766                 .viewportSubPixelBits                     = 13, /* We take a float? */
767                 .minMemoryMapAlignment                    = 4096, /* A page */
768                 .minTexelBufferOffsetAlignment            = 1,
769                 .minUniformBufferOffsetAlignment          = 4,
770                 .minStorageBufferOffsetAlignment          = 4,
771                 .minTexelOffset                           = -32,
772                 .maxTexelOffset                           = 31,
773                 .minTexelGatherOffset                     = -32,
774                 .maxTexelGatherOffset                     = 31,
775                 .minInterpolationOffset                   = -2,
776                 .maxInterpolationOffset                   = 2,
777                 .subPixelInterpolationOffsetBits          = 8,
778                 .maxFramebufferWidth                      = (1 << 14),
779                 .maxFramebufferHeight                     = (1 << 14),
780                 .maxFramebufferLayers                     = (1 << 10),
781                 .framebufferColorSampleCounts             = sample_counts,
782                 .framebufferDepthSampleCounts             = sample_counts,
783                 .framebufferStencilSampleCounts           = sample_counts,
784                 .framebufferNoAttachmentsSampleCounts     = sample_counts,
785                 .maxColorAttachments                      = MAX_RTS,
786                 .sampledImageColorSampleCounts            = sample_counts,
787                 .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
788                 .sampledImageDepthSampleCounts            = sample_counts,
789                 .sampledImageStencilSampleCounts          = sample_counts,
790                 .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
791                 .maxSampleMaskWords                       = 1,
792                 .timestampComputeAndGraphics              = true,
793                 .timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
794                 .maxClipDistances                         = 8,
795                 .maxCullDistances                         = 8,
796                 .maxCombinedClipAndCullDistances          = 8,
797                 .discreteQueuePriorities                  = 1,
798                 .pointSizeRange                           = { 0.125, 255.875 },
799                 .lineWidthRange                           = { 0.0, 7.9921875 },
800                 .pointSizeGranularity                     = (1.0 / 8.0),
801                 .lineWidthGranularity                     = (1.0 / 128.0),
802                 .strictLines                              = false, /* FINISHME */
803                 .standardSampleLocations                  = true,
804                 .optimalBufferCopyOffsetAlignment         = 128,
805                 .optimalBufferCopyRowPitchAlignment       = 128,
806                 .nonCoherentAtomSize                      = 64,
807         };
808
809         *pProperties = (VkPhysicalDeviceProperties) {
810                 .apiVersion = radv_physical_device_api_version(pdevice),
811                 .driverVersion = vk_get_driver_version(),
812                 .vendorID = ATI_VENDOR_ID,
813                 .deviceID = pdevice->rad_info.pci_id,
814                 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
815                 .limits = limits,
816                 .sparseProperties = {0},
817         };
818
819         strcpy(pProperties->deviceName, pdevice->name);
820         memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
821 }
822
823 void radv_GetPhysicalDeviceProperties2KHR(
824         VkPhysicalDevice                            physicalDevice,
825         VkPhysicalDeviceProperties2KHR             *pProperties)
826 {
827         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
828         radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
829
830         vk_foreach_struct(ext, pProperties->pNext) {
831                 switch (ext->sType) {
832                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
833                         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
834                                 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
835                         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
836                         break;
837                 }
838                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
839                         VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
840                         memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
841                         memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
842                         properties->deviceLUIDValid = false;
843                         break;
844                 }
845                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
846                         VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
847                         properties->maxMultiviewViewCount = MAX_VIEWS;
848                         properties->maxMultiviewInstanceIndex = INT_MAX;
849                         break;
850                 }
851                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
852                         VkPhysicalDevicePointClippingPropertiesKHR *properties =
853                             (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
854                         properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
855                         break;
856                 }
857                 case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
858                         VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
859                             (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
860                         properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
861                         break;
862                 }
863                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
864                         VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
865                             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
866                         properties->minImportedHostPointerAlignment = 4096;
867                         break;
868                 }
869                 default:
870                         break;
871                 }
872         }
873 }
874
875 static void radv_get_physical_device_queue_family_properties(
876         struct radv_physical_device*                pdevice,
877         uint32_t*                                   pCount,
878         VkQueueFamilyProperties**                    pQueueFamilyProperties)
879 {
880         int num_queue_families = 1;
881         int idx;
882         if (pdevice->rad_info.num_compute_rings > 0 &&
883             pdevice->rad_info.chip_class >= CIK &&
884             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
885                 num_queue_families++;
886
887         if (pQueueFamilyProperties == NULL) {
888                 *pCount = num_queue_families;
889                 return;
890         }
891
892         if (!*pCount)
893                 return;
894
895         idx = 0;
896         if (*pCount >= 1) {
897                 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
898                         .queueFlags = VK_QUEUE_GRAPHICS_BIT |
899                                       VK_QUEUE_COMPUTE_BIT |
900                                       VK_QUEUE_TRANSFER_BIT |
901                                       VK_QUEUE_SPARSE_BINDING_BIT,
902                         .queueCount = 1,
903                         .timestampValidBits = 64,
904                         .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
905                 };
906                 idx++;
907         }
908
909         if (pdevice->rad_info.num_compute_rings > 0 &&
910             pdevice->rad_info.chip_class >= CIK &&
911             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
912                 if (*pCount > idx) {
913                         *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
914                                 .queueFlags = VK_QUEUE_COMPUTE_BIT |
915                                               VK_QUEUE_TRANSFER_BIT |
916                                               VK_QUEUE_SPARSE_BINDING_BIT,
917                                 .queueCount = pdevice->rad_info.num_compute_rings,
918                                 .timestampValidBits = 64,
919                                 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
920                         };
921                         idx++;
922                 }
923         }
924         *pCount = idx;
925 }
926
927 void radv_GetPhysicalDeviceQueueFamilyProperties(
928         VkPhysicalDevice                            physicalDevice,
929         uint32_t*                                   pCount,
930         VkQueueFamilyProperties*                    pQueueFamilyProperties)
931 {
932         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
933         if (!pQueueFamilyProperties) {
934                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
935                 return;
936         }
937         VkQueueFamilyProperties *properties[] = {
938                 pQueueFamilyProperties + 0,
939                 pQueueFamilyProperties + 1,
940                 pQueueFamilyProperties + 2,
941         };
942         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
943         assert(*pCount <= 3);
944 }
945
946 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
947         VkPhysicalDevice                            physicalDevice,
948         uint32_t*                                   pCount,
949         VkQueueFamilyProperties2KHR                *pQueueFamilyProperties)
950 {
951         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
952         if (!pQueueFamilyProperties) {
953                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
954                 return;
955         }
956         VkQueueFamilyProperties *properties[] = {
957                 &pQueueFamilyProperties[0].queueFamilyProperties,
958                 &pQueueFamilyProperties[1].queueFamilyProperties,
959                 &pQueueFamilyProperties[2].queueFamilyProperties,
960         };
961         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
962         assert(*pCount <= 3);
963 }
964
965 void radv_GetPhysicalDeviceMemoryProperties(
966         VkPhysicalDevice                            physicalDevice,
967         VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
968 {
969         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
970
971         *pMemoryProperties = physical_device->memory_properties;
972 }
973
974 void radv_GetPhysicalDeviceMemoryProperties2KHR(
975         VkPhysicalDevice                            physicalDevice,
976         VkPhysicalDeviceMemoryProperties2KHR       *pMemoryProperties)
977 {
978         return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
979                                                       &pMemoryProperties->memoryProperties);
980 }
981
982 VkResult radv_GetMemoryHostPointerPropertiesEXT(
983         VkDevice                                    _device,
984         VkExternalMemoryHandleTypeFlagBitsKHR       handleType,
985         const void                                 *pHostPointer,
986         VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
987 {
988         RADV_FROM_HANDLE(radv_device, device, _device);
989
990         switch (handleType)
991         {
992         case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
993                 const struct radv_physical_device *physical_device = device->physical_device;
994                 uint32_t memoryTypeBits = 0;
995                 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
996                         if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
997                                 memoryTypeBits = (1 << i);
998                                 break;
999                         }
1000                 }
1001                 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1002                 return VK_SUCCESS;
1003         }
1004         default:
1005                 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1006         }
1007 }
1008
1009 static enum radeon_ctx_priority
1010 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1011 {
1012         /* Default to MEDIUM when a specific global priority isn't requested */
1013         if (!pObj)
1014                 return RADEON_CTX_PRIORITY_MEDIUM;
1015
1016         switch(pObj->globalPriority) {
1017         case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1018                 return RADEON_CTX_PRIORITY_REALTIME;
1019         case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1020                 return RADEON_CTX_PRIORITY_HIGH;
1021         case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1022                 return RADEON_CTX_PRIORITY_MEDIUM;
1023         case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1024                 return RADEON_CTX_PRIORITY_LOW;
1025         default:
1026                 unreachable("Illegal global priority value");
1027                 return RADEON_CTX_PRIORITY_INVALID;
1028         }
1029 }
1030
1031 static int
1032 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1033                 uint32_t queue_family_index, int idx,
1034                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1035 {
1036         queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1037         queue->device = device;
1038         queue->queue_family_index = queue_family_index;
1039         queue->queue_idx = idx;
1040         queue->priority = radv_get_queue_global_priority(global_priority);
1041
1042         queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1043         if (!queue->hw_ctx)
1044                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1045
1046         return VK_SUCCESS;
1047 }
1048
1049 static void
1050 radv_queue_finish(struct radv_queue *queue)
1051 {
1052         if (queue->hw_ctx)
1053                 queue->device->ws->ctx_destroy(queue->hw_ctx);
1054
1055         if (queue->initial_full_flush_preamble_cs)
1056                 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1057         if (queue->initial_preamble_cs)
1058                 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1059         if (queue->continue_preamble_cs)
1060                 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1061         if (queue->descriptor_bo)
1062                 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1063         if (queue->scratch_bo)
1064                 queue->device->ws->buffer_destroy(queue->scratch_bo);
1065         if (queue->esgs_ring_bo)
1066                 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1067         if (queue->gsvs_ring_bo)
1068                 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1069         if (queue->tess_factor_ring_bo)
1070                 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
1071         if (queue->tess_offchip_ring_bo)
1072                 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
1073         if (queue->compute_scratch_bo)
1074                 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1075 }
1076
1077 static void
1078 radv_device_init_gs_info(struct radv_device *device)
1079 {
1080         switch (device->physical_device->rad_info.family) {
1081         case CHIP_OLAND:
1082         case CHIP_HAINAN:
1083         case CHIP_KAVERI:
1084         case CHIP_KABINI:
1085         case CHIP_MULLINS:
1086         case CHIP_ICELAND:
1087         case CHIP_CARRIZO:
1088         case CHIP_STONEY:
1089                 device->gs_table_depth = 16;
1090                 return;
1091         case CHIP_TAHITI:
1092         case CHIP_PITCAIRN:
1093         case CHIP_VERDE:
1094         case CHIP_BONAIRE:
1095         case CHIP_HAWAII:
1096         case CHIP_TONGA:
1097         case CHIP_FIJI:
1098         case CHIP_POLARIS10:
1099         case CHIP_POLARIS11:
1100         case CHIP_POLARIS12:
1101         case CHIP_VEGA10:
1102         case CHIP_RAVEN:
1103                 device->gs_table_depth = 32;
1104                 return;
1105         default:
1106                 unreachable("unknown GPU");
1107         }
1108 }
1109
1110 static int radv_get_device_extension_index(const char *name)
1111 {
1112         for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1113                 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1114                         return i;
1115         }
1116         return -1;
1117 }
1118
1119 VkResult radv_CreateDevice(
1120         VkPhysicalDevice                            physicalDevice,
1121         const VkDeviceCreateInfo*                   pCreateInfo,
1122         const VkAllocationCallbacks*                pAllocator,
1123         VkDevice*                                   pDevice)
1124 {
1125         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1126         VkResult result;
1127         struct radv_device *device;
1128
1129         bool keep_shader_info = false;
1130
1131         /* Check enabled features */
1132         if (pCreateInfo->pEnabledFeatures) {
1133                 VkPhysicalDeviceFeatures supported_features;
1134                 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1135                 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1136                 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1137                 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1138                 for (uint32_t i = 0; i < num_features; i++) {
1139                         if (enabled_feature[i] && !supported_feature[i])
1140                                 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1141                 }
1142         }
1143
1144         device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1145                             sizeof(*device), 8,
1146                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1147         if (!device)
1148                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1149
1150         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1151         device->instance = physical_device->instance;
1152         device->physical_device = physical_device;
1153
1154         device->ws = physical_device->ws;
1155         if (pAllocator)
1156                 device->alloc = *pAllocator;
1157         else
1158                 device->alloc = physical_device->instance->alloc;
1159
1160         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1161                 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1162                 int index = radv_get_device_extension_index(ext_name);
1163                 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1164                         vk_free(&device->alloc, device);
1165                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1166                 }
1167
1168                 device->enabled_extensions.extensions[index] = true;
1169         }
1170
1171         keep_shader_info = device->enabled_extensions.AMD_shader_info;
1172
1173         mtx_init(&device->shader_slab_mutex, mtx_plain);
1174         list_inithead(&device->shader_slabs);
1175
1176         for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1177                 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1178                 uint32_t qfi = queue_create->queueFamilyIndex;
1179                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1180                         vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1181
1182                 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1183
1184                 device->queues[qfi] = vk_alloc(&device->alloc,
1185                                                queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1186                 if (!device->queues[qfi]) {
1187                         result = VK_ERROR_OUT_OF_HOST_MEMORY;
1188                         goto fail;
1189                 }
1190
1191                 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1192
1193                 device->queue_count[qfi] = queue_create->queueCount;
1194
1195                 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1196                         result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1197                         if (result != VK_SUCCESS)
1198                                 goto fail;
1199                 }
1200         }
1201
1202         device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1203                               (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
1204
1205         /* Disabled and not implemented for now. */
1206         device->dfsm_allowed = device->pbb_allowed && false;
1207
1208 #ifdef ANDROID
1209         device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1210 #endif
1211
1212         device->llvm_supports_spill = true;
1213
1214         /* The maximum number of scratch waves. Scratch space isn't divided
1215          * evenly between CUs. The number is only a function of the number of CUs.
1216          * We can decrease the constant to decrease the scratch buffer size.
1217          *
1218          * sctx->scratch_waves must be >= the maximum posible size of
1219          * 1 threadgroup, so that the hw doesn't hang from being unable
1220          * to start any.
1221          *
1222          * The recommended value is 4 per CU at most. Higher numbers don't
1223          * bring much benefit, but they still occupy chip resources (think
1224          * async compute). I've seen ~2% performance difference between 4 and 32.
1225          */
1226         uint32_t max_threads_per_block = 2048;
1227         device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1228                                      max_threads_per_block / 64);
1229
1230         device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
1231                                      S_00B800_FORCE_START_AT_000(1);
1232
1233         if (device->physical_device->rad_info.chip_class >= CIK) {
1234                 /* If the KMD allows it (there is a KMD hw register for it),
1235                  * allow launching waves out-of-order.
1236                  */
1237                 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1238         }
1239
1240         radv_device_init_gs_info(device);
1241
1242         device->tess_offchip_block_dw_size =
1243                 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1244         device->has_distributed_tess =
1245                 device->physical_device->rad_info.chip_class >= VI &&
1246                 device->physical_device->rad_info.max_se >= 2;
1247
1248         if (getenv("RADV_TRACE_FILE")) {
1249                 keep_shader_info = true;
1250
1251                 if (!radv_init_trace(device))
1252                         goto fail;
1253         }
1254
1255         device->keep_shader_info = keep_shader_info;
1256
1257         result = radv_device_init_meta(device);
1258         if (result != VK_SUCCESS)
1259                 goto fail;
1260
1261         radv_device_init_msaa(device);
1262
1263         for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1264                 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1265                 switch (family) {
1266                 case RADV_QUEUE_GENERAL:
1267                         radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1268                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1269                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1270                         break;
1271                 case RADV_QUEUE_COMPUTE:
1272                         radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1273                         radeon_emit(device->empty_cs[family], 0);
1274                         break;
1275                 }
1276                 device->ws->cs_finalize(device->empty_cs[family]);
1277         }
1278
1279         if (device->physical_device->rad_info.chip_class >= CIK)
1280                 cik_create_gfx_config(device);
1281
1282         VkPipelineCacheCreateInfo ci;
1283         ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1284         ci.pNext = NULL;
1285         ci.flags = 0;
1286         ci.pInitialData = NULL;
1287         ci.initialDataSize = 0;
1288         VkPipelineCache pc;
1289         result = radv_CreatePipelineCache(radv_device_to_handle(device),
1290                                           &ci, NULL, &pc);
1291         if (result != VK_SUCCESS)
1292                 goto fail_meta;
1293
1294         device->mem_cache = radv_pipeline_cache_from_handle(pc);
1295
1296         *pDevice = radv_device_to_handle(device);
1297         return VK_SUCCESS;
1298
1299 fail_meta:
1300         radv_device_finish_meta(device);
1301 fail:
1302         if (device->trace_bo)
1303                 device->ws->buffer_destroy(device->trace_bo);
1304
1305         if (device->gfx_init)
1306                 device->ws->buffer_destroy(device->gfx_init);
1307
1308         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1309                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1310                         radv_queue_finish(&device->queues[i][q]);
1311                 if (device->queue_count[i])
1312                         vk_free(&device->alloc, device->queues[i]);
1313         }
1314
1315         vk_free(&device->alloc, device);
1316         return result;
1317 }
1318
1319 void radv_DestroyDevice(
1320         VkDevice                                    _device,
1321         const VkAllocationCallbacks*                pAllocator)
1322 {
1323         RADV_FROM_HANDLE(radv_device, device, _device);
1324
1325         if (!device)
1326                 return;
1327
1328         if (device->trace_bo)
1329                 device->ws->buffer_destroy(device->trace_bo);
1330
1331         if (device->gfx_init)
1332                 device->ws->buffer_destroy(device->gfx_init);
1333
1334         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1335                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1336                         radv_queue_finish(&device->queues[i][q]);
1337                 if (device->queue_count[i])
1338                         vk_free(&device->alloc, device->queues[i]);
1339                 if (device->empty_cs[i])
1340                         device->ws->cs_destroy(device->empty_cs[i]);
1341         }
1342         radv_device_finish_meta(device);
1343
1344         VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1345         radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1346
1347         radv_destroy_shader_slabs(device);
1348
1349         vk_free(&device->alloc, device);
1350 }
1351
1352 VkResult radv_EnumerateInstanceLayerProperties(
1353         uint32_t*                                   pPropertyCount,
1354         VkLayerProperties*                          pProperties)
1355 {
1356         if (pProperties == NULL) {
1357                 *pPropertyCount = 0;
1358                 return VK_SUCCESS;
1359         }
1360
1361         /* None supported at this time */
1362         return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1363 }
1364
1365 VkResult radv_EnumerateDeviceLayerProperties(
1366         VkPhysicalDevice                            physicalDevice,
1367         uint32_t*                                   pPropertyCount,
1368         VkLayerProperties*                          pProperties)
1369 {
1370         if (pProperties == NULL) {
1371                 *pPropertyCount = 0;
1372                 return VK_SUCCESS;
1373         }
1374
1375         /* None supported at this time */
1376         return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1377 }
1378
1379 void radv_GetDeviceQueue(
1380         VkDevice                                    _device,
1381         uint32_t                                    queueFamilyIndex,
1382         uint32_t                                    queueIndex,
1383         VkQueue*                                    pQueue)
1384 {
1385         RADV_FROM_HANDLE(radv_device, device, _device);
1386
1387         *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1388 }
1389
1390 static void
1391 fill_geom_tess_rings(struct radv_queue *queue,
1392                      uint32_t *map,
1393                      bool add_sample_positions,
1394                      uint32_t esgs_ring_size,
1395                      struct radeon_winsys_bo *esgs_ring_bo,
1396                      uint32_t gsvs_ring_size,
1397                      struct radeon_winsys_bo *gsvs_ring_bo,
1398                      uint32_t tess_factor_ring_size,
1399                      struct radeon_winsys_bo *tess_factor_ring_bo,
1400                      uint32_t tess_offchip_ring_size,
1401                      struct radeon_winsys_bo *tess_offchip_ring_bo)
1402 {
1403         uint64_t esgs_va = 0, gsvs_va = 0;
1404         uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1405         uint32_t *desc = &map[4];
1406
1407         if (esgs_ring_bo)
1408                 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1409         if (gsvs_ring_bo)
1410                 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1411         if (tess_factor_ring_bo)
1412                 tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
1413         if (tess_offchip_ring_bo)
1414                 tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
1415
1416         /* stride 0, num records - size, add tid, swizzle, elsize4,
1417            index stride 64 */
1418         desc[0] = esgs_va;
1419         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1420                 S_008F04_STRIDE(0) |
1421                 S_008F04_SWIZZLE_ENABLE(true);
1422         desc[2] = esgs_ring_size;
1423         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1424                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1425                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1426                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1427                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1428                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1429                 S_008F0C_ELEMENT_SIZE(1) |
1430                 S_008F0C_INDEX_STRIDE(3) |
1431                 S_008F0C_ADD_TID_ENABLE(true);
1432
1433         desc += 4;
1434         /* GS entry for ES->GS ring */
1435         /* stride 0, num records - size, elsize0,
1436            index stride 0 */
1437         desc[0] = esgs_va;
1438         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1439                 S_008F04_STRIDE(0) |
1440                 S_008F04_SWIZZLE_ENABLE(false);
1441         desc[2] = esgs_ring_size;
1442         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1443                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1444                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1445                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1446                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1447                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1448                 S_008F0C_ELEMENT_SIZE(0) |
1449                 S_008F0C_INDEX_STRIDE(0) |
1450                 S_008F0C_ADD_TID_ENABLE(false);
1451
1452         desc += 4;
1453         /* VS entry for GS->VS ring */
1454         /* stride 0, num records - size, elsize0,
1455            index stride 0 */
1456         desc[0] = gsvs_va;
1457         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1458                 S_008F04_STRIDE(0) |
1459                 S_008F04_SWIZZLE_ENABLE(false);
1460         desc[2] = gsvs_ring_size;
1461         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1462                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1463                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1464                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1465                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1466                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1467                 S_008F0C_ELEMENT_SIZE(0) |
1468                 S_008F0C_INDEX_STRIDE(0) |
1469                 S_008F0C_ADD_TID_ENABLE(false);
1470         desc += 4;
1471
1472         /* stride gsvs_itemsize, num records 64
1473            elsize 4, index stride 16 */
1474         /* shader will patch stride and desc[2] */
1475         desc[0] = gsvs_va;
1476         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1477                 S_008F04_STRIDE(0) |
1478                 S_008F04_SWIZZLE_ENABLE(true);
1479         desc[2] = 0;
1480         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1481                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1482                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1483                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1484                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1485                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1486                 S_008F0C_ELEMENT_SIZE(1) |
1487                 S_008F0C_INDEX_STRIDE(1) |
1488                 S_008F0C_ADD_TID_ENABLE(true);
1489         desc += 4;
1490
1491         desc[0] = tess_factor_va;
1492         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1493                 S_008F04_STRIDE(0) |
1494                 S_008F04_SWIZZLE_ENABLE(false);
1495         desc[2] = tess_factor_ring_size;
1496         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1497                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1498                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1499                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1500                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1501                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1502                 S_008F0C_ELEMENT_SIZE(0) |
1503                 S_008F0C_INDEX_STRIDE(0) |
1504                 S_008F0C_ADD_TID_ENABLE(false);
1505         desc += 4;
1506
1507         desc[0] = tess_offchip_va;
1508         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1509                 S_008F04_STRIDE(0) |
1510                 S_008F04_SWIZZLE_ENABLE(false);
1511         desc[2] = tess_offchip_ring_size;
1512         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1513                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1514                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1515                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1516                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1517                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1518                 S_008F0C_ELEMENT_SIZE(0) |
1519                 S_008F0C_INDEX_STRIDE(0) |
1520                 S_008F0C_ADD_TID_ENABLE(false);
1521         desc += 4;
1522
1523         /* add sample positions after all rings */
1524         memcpy(desc, queue->device->sample_locations_1x, 8);
1525         desc += 2;
1526         memcpy(desc, queue->device->sample_locations_2x, 16);
1527         desc += 4;
1528         memcpy(desc, queue->device->sample_locations_4x, 32);
1529         desc += 8;
1530         memcpy(desc, queue->device->sample_locations_8x, 64);
1531         desc += 16;
1532         memcpy(desc, queue->device->sample_locations_16x, 128);
1533 }
1534
1535 static unsigned
1536 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1537 {
1538         bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1539                 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1540                 device->physical_device->rad_info.family != CHIP_STONEY;
1541         unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1542         unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1543                 device->physical_device->rad_info.max_se;
1544         unsigned offchip_granularity;
1545         unsigned hs_offchip_param;
1546         switch (device->tess_offchip_block_dw_size) {
1547         default:
1548                 assert(0);
1549                 /* fall through */
1550         case 8192:
1551                 offchip_granularity = V_03093C_X_8K_DWORDS;
1552                 break;
1553         case 4096:
1554                 offchip_granularity = V_03093C_X_4K_DWORDS;
1555                 break;
1556         }
1557
1558         switch (device->physical_device->rad_info.chip_class) {
1559         case SI:
1560                 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1561                 break;
1562         case CIK:
1563         case VI:
1564         case GFX9:
1565         default:
1566                 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1567                 break;
1568         }
1569
1570         *max_offchip_buffers_p = max_offchip_buffers;
1571         if (device->physical_device->rad_info.chip_class >= CIK) {
1572                 if (device->physical_device->rad_info.chip_class >= VI)
1573                         --max_offchip_buffers;
1574                 hs_offchip_param =
1575                         S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1576                         S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1577         } else {
1578                 hs_offchip_param =
1579                         S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1580         }
1581         return hs_offchip_param;
1582 }
1583
1584 static VkResult
1585 radv_get_preamble_cs(struct radv_queue *queue,
1586                      uint32_t scratch_size,
1587                      uint32_t compute_scratch_size,
1588                      uint32_t esgs_ring_size,
1589                      uint32_t gsvs_ring_size,
1590                      bool needs_tess_rings,
1591                      bool needs_sample_positions,
1592                      struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1593                      struct radeon_winsys_cs **initial_preamble_cs,
1594                      struct radeon_winsys_cs **continue_preamble_cs)
1595 {
1596         struct radeon_winsys_bo *scratch_bo = NULL;
1597         struct radeon_winsys_bo *descriptor_bo = NULL;
1598         struct radeon_winsys_bo *compute_scratch_bo = NULL;
1599         struct radeon_winsys_bo *esgs_ring_bo = NULL;
1600         struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1601         struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1602         struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1603         struct radeon_winsys_cs *dest_cs[3] = {0};
1604         bool add_tess_rings = false, add_sample_positions = false;
1605         unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1606         unsigned max_offchip_buffers;
1607         unsigned hs_offchip_param = 0;
1608         uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1609         if (!queue->has_tess_rings) {
1610                 if (needs_tess_rings)
1611                         add_tess_rings = true;
1612         }
1613         if (!queue->has_sample_positions) {
1614                 if (needs_sample_positions)
1615                         add_sample_positions = true;
1616         }
1617         tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1618         hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1619                                                      &max_offchip_buffers);
1620         tess_offchip_ring_size = max_offchip_buffers *
1621                 queue->device->tess_offchip_block_dw_size * 4;
1622
1623         if (scratch_size <= queue->scratch_size &&
1624             compute_scratch_size <= queue->compute_scratch_size &&
1625             esgs_ring_size <= queue->esgs_ring_size &&
1626             gsvs_ring_size <= queue->gsvs_ring_size &&
1627             !add_tess_rings && !add_sample_positions &&
1628             queue->initial_preamble_cs) {
1629                 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1630                 *initial_preamble_cs = queue->initial_preamble_cs;
1631                 *continue_preamble_cs = queue->continue_preamble_cs;
1632                 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1633                         *continue_preamble_cs = NULL;
1634                 return VK_SUCCESS;
1635         }
1636
1637         if (scratch_size > queue->scratch_size) {
1638                 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1639                                                               scratch_size,
1640                                                               4096,
1641                                                               RADEON_DOMAIN_VRAM,
1642                                                               ring_bo_flags);
1643                 if (!scratch_bo)
1644                         goto fail;
1645         } else
1646                 scratch_bo = queue->scratch_bo;
1647
1648         if (compute_scratch_size > queue->compute_scratch_size) {
1649                 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1650                                                                       compute_scratch_size,
1651                                                                       4096,
1652                                                                       RADEON_DOMAIN_VRAM,
1653                                                                       ring_bo_flags);
1654                 if (!compute_scratch_bo)
1655                         goto fail;
1656
1657         } else
1658                 compute_scratch_bo = queue->compute_scratch_bo;
1659
1660         if (esgs_ring_size > queue->esgs_ring_size) {
1661                 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1662                                                                 esgs_ring_size,
1663                                                                 4096,
1664                                                                 RADEON_DOMAIN_VRAM,
1665                                                                 ring_bo_flags);
1666                 if (!esgs_ring_bo)
1667                         goto fail;
1668         } else {
1669                 esgs_ring_bo = queue->esgs_ring_bo;
1670                 esgs_ring_size = queue->esgs_ring_size;
1671         }
1672
1673         if (gsvs_ring_size > queue->gsvs_ring_size) {
1674                 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1675                                                                 gsvs_ring_size,
1676                                                                 4096,
1677                                                                 RADEON_DOMAIN_VRAM,
1678                                                                 ring_bo_flags);
1679                 if (!gsvs_ring_bo)
1680                         goto fail;
1681         } else {
1682                 gsvs_ring_bo = queue->gsvs_ring_bo;
1683                 gsvs_ring_size = queue->gsvs_ring_size;
1684         }
1685
1686         if (add_tess_rings) {
1687                 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1688                                                                        tess_factor_ring_size,
1689                                                                        256,
1690                                                                        RADEON_DOMAIN_VRAM,
1691                                                                        ring_bo_flags);
1692                 if (!tess_factor_ring_bo)
1693                         goto fail;
1694                 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1695                                                                        tess_offchip_ring_size,
1696                                                                        256,
1697                                                                        RADEON_DOMAIN_VRAM,
1698                                                                         ring_bo_flags);
1699                 if (!tess_offchip_ring_bo)
1700                         goto fail;
1701         } else {
1702                 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1703                 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1704         }
1705
1706         if (scratch_bo != queue->scratch_bo ||
1707             esgs_ring_bo != queue->esgs_ring_bo ||
1708             gsvs_ring_bo != queue->gsvs_ring_bo ||
1709             tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1710             tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1711                 uint32_t size = 0;
1712                 if (gsvs_ring_bo || esgs_ring_bo ||
1713                     tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1714                         size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1715                         if (add_sample_positions)
1716                                 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1717                 }
1718                 else if (scratch_bo)
1719                         size = 8; /* 2 dword */
1720
1721                 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1722                                                                  size,
1723                                                                  4096,
1724                                                                  RADEON_DOMAIN_VRAM,
1725                                                                  RADEON_FLAG_CPU_ACCESS |
1726                                                                  RADEON_FLAG_NO_INTERPROCESS_SHARING |
1727                                                                  RADEON_FLAG_READ_ONLY);
1728                 if (!descriptor_bo)
1729                         goto fail;
1730         } else
1731                 descriptor_bo = queue->descriptor_bo;
1732
1733         for(int i = 0; i < 3; ++i) {
1734                 struct radeon_winsys_cs *cs = NULL;
1735                 cs = queue->device->ws->cs_create(queue->device->ws,
1736                                                   queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1737                 if (!cs)
1738                         goto fail;
1739
1740                 dest_cs[i] = cs;
1741
1742                 if (scratch_bo)
1743                         radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1744
1745                 if (esgs_ring_bo)
1746                         radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1747
1748                 if (gsvs_ring_bo)
1749                         radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1750
1751                 if (tess_factor_ring_bo)
1752                         radv_cs_add_buffer(queue->device->ws, cs, tess_factor_ring_bo, 8);
1753
1754                 if (tess_offchip_ring_bo)
1755                         radv_cs_add_buffer(queue->device->ws, cs, tess_offchip_ring_bo, 8);
1756
1757                 if (descriptor_bo)
1758                         radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1759
1760                 if (descriptor_bo != queue->descriptor_bo) {
1761                         uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1762
1763                         if (scratch_bo) {
1764                                 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1765                                 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1766                                                  S_008F04_SWIZZLE_ENABLE(1);
1767                                 map[0] = scratch_va;
1768                                 map[1] = rsrc1;
1769                         }
1770
1771                         if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1772                             add_sample_positions)
1773                                 fill_geom_tess_rings(queue, map, add_sample_positions,
1774                                                      esgs_ring_size, esgs_ring_bo,
1775                                                      gsvs_ring_size, gsvs_ring_bo,
1776                                                      tess_factor_ring_size, tess_factor_ring_bo,
1777                                                      tess_offchip_ring_size, tess_offchip_ring_bo);
1778
1779                         queue->device->ws->buffer_unmap(descriptor_bo);
1780                 }
1781
1782                 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1783                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1784                         radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1785                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1786                         radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1787                 }
1788
1789                 if (esgs_ring_bo || gsvs_ring_bo) {
1790                         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1791                                 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1792                                 radeon_emit(cs, esgs_ring_size >> 8);
1793                                 radeon_emit(cs, gsvs_ring_size >> 8);
1794                         } else {
1795                                 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1796                                 radeon_emit(cs, esgs_ring_size >> 8);
1797                                 radeon_emit(cs, gsvs_ring_size >> 8);
1798                         }
1799                 }
1800
1801                 if (tess_factor_ring_bo) {
1802                         uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
1803                         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1804                                 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1805                                                        S_030938_SIZE(tess_factor_ring_size / 4));
1806                                 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1807                                                        tf_va >> 8);
1808                                 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1809                                         radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1810                                                                tf_va >> 40);
1811                                 }
1812                                 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1813                         } else {
1814                                 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1815                                                       S_008988_SIZE(tess_factor_ring_size / 4));
1816                                 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1817                                                       tf_va >> 8);
1818                                 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1819                                                       hs_offchip_param);
1820                         }
1821                 }
1822
1823                 if (descriptor_bo) {
1824                         uint64_t va = radv_buffer_get_va(descriptor_bo);
1825                         if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1826                                 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1827                                                 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1828                                                 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1829                                                 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1830
1831                                 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1832                                         radeon_set_sh_reg_seq(cs, regs[i], 2);
1833                                         radeon_emit(cs, va);
1834                                         radeon_emit(cs, va >> 32);
1835                                 }
1836                         } else {
1837                                 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1838                                                 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1839                                                 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1840                                                 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1841                                                 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1842                                                 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1843
1844                                 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1845                                         radeon_set_sh_reg_seq(cs, regs[i], 2);
1846                                         radeon_emit(cs, va);
1847                                         radeon_emit(cs, va >> 32);
1848                                 }
1849                         }
1850                 }
1851
1852                 if (compute_scratch_bo) {
1853                         uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1854                         uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1855                                          S_008F04_SWIZZLE_ENABLE(1);
1856
1857                         radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1858
1859                         radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1860                         radeon_emit(cs, scratch_va);
1861                         radeon_emit(cs, rsrc1);
1862                 }
1863
1864                 if (i == 0) {
1865                         si_cs_emit_cache_flush(cs,
1866                                                queue->device->physical_device->rad_info.chip_class,
1867                                                NULL, 0,
1868                                                queue->queue_family_index == RING_COMPUTE &&
1869                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
1870                                                (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1871                                                RADV_CMD_FLAG_INV_ICACHE |
1872                                                RADV_CMD_FLAG_INV_SMEM_L1 |
1873                                                RADV_CMD_FLAG_INV_VMEM_L1 |
1874                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
1875                 } else if (i == 1) {
1876                         si_cs_emit_cache_flush(cs,
1877                                                queue->device->physical_device->rad_info.chip_class,
1878                                                NULL, 0,
1879                                                queue->queue_family_index == RING_COMPUTE &&
1880                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
1881                                                RADV_CMD_FLAG_INV_ICACHE |
1882                                                RADV_CMD_FLAG_INV_SMEM_L1 |
1883                                                RADV_CMD_FLAG_INV_VMEM_L1 |
1884                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
1885                 }
1886
1887                 if (!queue->device->ws->cs_finalize(cs))
1888                         goto fail;
1889         }
1890
1891         if (queue->initial_full_flush_preamble_cs)
1892                         queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1893
1894         if (queue->initial_preamble_cs)
1895                         queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1896
1897         if (queue->continue_preamble_cs)
1898                         queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1899
1900         queue->initial_full_flush_preamble_cs = dest_cs[0];
1901         queue->initial_preamble_cs = dest_cs[1];
1902         queue->continue_preamble_cs = dest_cs[2];
1903
1904         if (scratch_bo != queue->scratch_bo) {
1905                 if (queue->scratch_bo)
1906                         queue->device->ws->buffer_destroy(queue->scratch_bo);
1907                 queue->scratch_bo = scratch_bo;
1908                 queue->scratch_size = scratch_size;
1909         }
1910
1911         if (compute_scratch_bo != queue->compute_scratch_bo) {
1912                 if (queue->compute_scratch_bo)
1913                         queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1914                 queue->compute_scratch_bo = compute_scratch_bo;
1915                 queue->compute_scratch_size = compute_scratch_size;
1916         }
1917
1918         if (esgs_ring_bo != queue->esgs_ring_bo) {
1919                 if (queue->esgs_ring_bo)
1920                         queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1921                 queue->esgs_ring_bo = esgs_ring_bo;
1922                 queue->esgs_ring_size = esgs_ring_size;
1923         }
1924
1925         if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1926                 if (queue->gsvs_ring_bo)
1927                         queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1928                 queue->gsvs_ring_bo = gsvs_ring_bo;
1929                 queue->gsvs_ring_size = gsvs_ring_size;
1930         }
1931
1932         if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1933                 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1934         }
1935
1936         if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1937                 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1938                 queue->has_tess_rings = true;
1939         }
1940
1941         if (descriptor_bo != queue->descriptor_bo) {
1942                 if (queue->descriptor_bo)
1943                         queue->device->ws->buffer_destroy(queue->descriptor_bo);
1944
1945                 queue->descriptor_bo = descriptor_bo;
1946         }
1947
1948         if (add_sample_positions)
1949                 queue->has_sample_positions = true;
1950
1951         *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1952         *initial_preamble_cs = queue->initial_preamble_cs;
1953         *continue_preamble_cs = queue->continue_preamble_cs;
1954         if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1955                         *continue_preamble_cs = NULL;
1956         return VK_SUCCESS;
1957 fail:
1958         for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1959                 if (dest_cs[i])
1960                         queue->device->ws->cs_destroy(dest_cs[i]);
1961         if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1962                 queue->device->ws->buffer_destroy(descriptor_bo);
1963         if (scratch_bo && scratch_bo != queue->scratch_bo)
1964                 queue->device->ws->buffer_destroy(scratch_bo);
1965         if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1966                 queue->device->ws->buffer_destroy(compute_scratch_bo);
1967         if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1968                 queue->device->ws->buffer_destroy(esgs_ring_bo);
1969         if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1970                 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1971         if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1972                 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1973         if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1974                 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1975         return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1976 }
1977
1978 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
1979                                       int num_sems,
1980                                       const VkSemaphore *sems,
1981                                       VkFence _fence,
1982                                       bool reset_temp)
1983 {
1984         int syncobj_idx = 0, sem_idx = 0;
1985
1986         if (num_sems == 0 && _fence == VK_NULL_HANDLE)
1987                 return VK_SUCCESS;
1988
1989         for (uint32_t i = 0; i < num_sems; i++) {
1990                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1991
1992                 if (sem->temp_syncobj || sem->syncobj)
1993                         counts->syncobj_count++;
1994                 else
1995                         counts->sem_count++;
1996         }
1997
1998         if (_fence != VK_NULL_HANDLE) {
1999                 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2000                 if (fence->temp_syncobj || fence->syncobj)
2001                         counts->syncobj_count++;
2002         }
2003
2004         if (counts->syncobj_count) {
2005                 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2006                 if (!counts->syncobj)
2007                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2008         }
2009
2010         if (counts->sem_count) {
2011                 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2012                 if (!counts->sem) {
2013                         free(counts->syncobj);
2014                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2015                 }
2016         }
2017
2018         for (uint32_t i = 0; i < num_sems; i++) {
2019                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2020
2021                 if (sem->temp_syncobj) {
2022                         counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2023                 }
2024                 else if (sem->syncobj)
2025                         counts->syncobj[syncobj_idx++] = sem->syncobj;
2026                 else {
2027                         assert(sem->sem);
2028                         counts->sem[sem_idx++] = sem->sem;
2029                 }
2030         }
2031
2032         if (_fence != VK_NULL_HANDLE) {
2033                 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2034                 if (fence->temp_syncobj)
2035                         counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2036                 else if (fence->syncobj)
2037                         counts->syncobj[syncobj_idx++] = fence->syncobj;
2038         }
2039
2040         return VK_SUCCESS;
2041 }
2042
2043 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2044 {
2045         free(sem_info->wait.syncobj);
2046         free(sem_info->wait.sem);
2047         free(sem_info->signal.syncobj);
2048         free(sem_info->signal.sem);
2049 }
2050
2051
2052 static void radv_free_temp_syncobjs(struct radv_device *device,
2053                                     int num_sems,
2054                                     const VkSemaphore *sems)
2055 {
2056         for (uint32_t i = 0; i < num_sems; i++) {
2057                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2058
2059                 if (sem->temp_syncobj) {
2060                         device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2061                         sem->temp_syncobj = 0;
2062                 }
2063         }
2064 }
2065
2066 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
2067                              int num_wait_sems,
2068                              const VkSemaphore *wait_sems,
2069                              int num_signal_sems,
2070                              const VkSemaphore *signal_sems,
2071                              VkFence fence)
2072 {
2073         VkResult ret;
2074         memset(sem_info, 0, sizeof(*sem_info));
2075
2076         ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2077         if (ret)
2078                 return ret;
2079         ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
2080         if (ret)
2081                 radv_free_sem_info(sem_info);
2082
2083         /* caller can override these */
2084         sem_info->cs_emit_wait = true;
2085         sem_info->cs_emit_signal = true;
2086         return ret;
2087 }
2088
2089 /* Signals fence as soon as all the work currently put on queue is done. */
2090 static VkResult radv_signal_fence(struct radv_queue *queue,
2091                               struct radv_fence *fence)
2092 {
2093         int ret;
2094         VkResult result;
2095         struct radv_winsys_sem_info sem_info;
2096
2097         result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
2098                                      radv_fence_to_handle(fence));
2099         if (result != VK_SUCCESS)
2100                 return result;
2101
2102         ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2103                                            &queue->device->empty_cs[queue->queue_family_index],
2104                                            1, NULL, NULL, &sem_info,
2105                                            false, fence->fence);
2106         radv_free_sem_info(&sem_info);
2107
2108         /* TODO: find a better error */
2109         if (ret)
2110                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2111
2112         return VK_SUCCESS;
2113 }
2114
2115 VkResult radv_QueueSubmit(
2116         VkQueue                                     _queue,
2117         uint32_t                                    submitCount,
2118         const VkSubmitInfo*                         pSubmits,
2119         VkFence                                     _fence)
2120 {
2121         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2122         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2123         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2124         struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2125         int ret;
2126         uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2127         uint32_t scratch_size = 0;
2128         uint32_t compute_scratch_size = 0;
2129         uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2130         struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2131         VkResult result;
2132         bool fence_emitted = false;
2133         bool tess_rings_needed = false;
2134         bool sample_positions_needed = false;
2135
2136         /* Do this first so failing to allocate scratch buffers can't result in
2137          * partially executed submissions. */
2138         for (uint32_t i = 0; i < submitCount; i++) {
2139                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2140                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2141                                          pSubmits[i].pCommandBuffers[j]);
2142
2143                         scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2144                         compute_scratch_size = MAX2(compute_scratch_size,
2145                                                     cmd_buffer->compute_scratch_size_needed);
2146                         esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2147                         gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2148                         tess_rings_needed |= cmd_buffer->tess_rings_needed;
2149                         sample_positions_needed |= cmd_buffer->sample_positions_needed;
2150                 }
2151         }
2152
2153         result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2154                                       esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2155                                       sample_positions_needed, &initial_flush_preamble_cs,
2156                                       &initial_preamble_cs, &continue_preamble_cs);
2157         if (result != VK_SUCCESS)
2158                 return result;
2159
2160         for (uint32_t i = 0; i < submitCount; i++) {
2161                 struct radeon_winsys_cs **cs_array;
2162                 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2163                 bool can_patch = true;
2164                 uint32_t advance;
2165                 struct radv_winsys_sem_info sem_info;
2166
2167                 result = radv_alloc_sem_info(&sem_info,
2168                                              pSubmits[i].waitSemaphoreCount,
2169                                              pSubmits[i].pWaitSemaphores,
2170                                              pSubmits[i].signalSemaphoreCount,
2171                                              pSubmits[i].pSignalSemaphores,
2172                                              _fence);
2173                 if (result != VK_SUCCESS)
2174                         return result;
2175
2176                 if (!pSubmits[i].commandBufferCount) {
2177                         if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2178                                 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2179                                                                    &queue->device->empty_cs[queue->queue_family_index],
2180                                                                    1, NULL, NULL,
2181                                                                    &sem_info,
2182                                                                    false, base_fence);
2183                                 if (ret) {
2184                                         radv_loge("failed to submit CS %d\n", i);
2185                                         abort();
2186                                 }
2187                                 fence_emitted = true;
2188                         }
2189                         radv_free_sem_info(&sem_info);
2190                         continue;
2191                 }
2192
2193                 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2194                                                 (pSubmits[i].commandBufferCount));
2195
2196                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2197                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2198                                          pSubmits[i].pCommandBuffers[j]);
2199                         assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2200
2201                         cs_array[j] = cmd_buffer->cs;
2202                         if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2203                                 can_patch = false;
2204
2205                         cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2206                 }
2207
2208                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2209                         struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2210                         advance = MIN2(max_cs_submission,
2211                                        pSubmits[i].commandBufferCount - j);
2212
2213                         if (queue->device->trace_bo)
2214                                 *queue->device->trace_id_ptr = 0;
2215
2216                         sem_info.cs_emit_wait = j == 0;
2217                         sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2218
2219                         ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2220                                                         advance, initial_preamble, continue_preamble_cs,
2221                                                            &sem_info,
2222                                                         can_patch, base_fence);
2223
2224                         if (ret) {
2225                                 radv_loge("failed to submit CS %d\n", i);
2226                                 abort();
2227                         }
2228                         fence_emitted = true;
2229                         if (queue->device->trace_bo) {
2230                                 radv_check_gpu_hangs(queue, cs_array[j]);
2231                         }
2232                 }
2233
2234                 radv_free_temp_syncobjs(queue->device,
2235                                         pSubmits[i].waitSemaphoreCount,
2236                                         pSubmits[i].pWaitSemaphores);
2237                 radv_free_sem_info(&sem_info);
2238                 free(cs_array);
2239         }
2240
2241         if (fence) {
2242                 if (!fence_emitted) {
2243                         radv_signal_fence(queue, fence);
2244                 }
2245                 fence->submitted = true;
2246         }
2247
2248         return VK_SUCCESS;
2249 }
2250
2251 VkResult radv_QueueWaitIdle(
2252         VkQueue                                     _queue)
2253 {
2254         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2255
2256         queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2257                                          radv_queue_family_to_ring(queue->queue_family_index),
2258                                          queue->queue_idx);
2259         return VK_SUCCESS;
2260 }
2261
2262 VkResult radv_DeviceWaitIdle(
2263         VkDevice                                    _device)
2264 {
2265         RADV_FROM_HANDLE(radv_device, device, _device);
2266
2267         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2268                 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2269                         radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2270                 }
2271         }
2272         return VK_SUCCESS;
2273 }
2274
2275 VkResult radv_EnumerateInstanceExtensionProperties(
2276     const char*                                 pLayerName,
2277     uint32_t*                                   pPropertyCount,
2278     VkExtensionProperties*                      pProperties)
2279 {
2280         VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2281
2282         for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2283                 if (radv_supported_instance_extensions.extensions[i]) {
2284                         vk_outarray_append(&out, prop) {
2285                                 *prop = radv_instance_extensions[i];
2286                         }
2287                 }
2288         }
2289
2290         return vk_outarray_status(&out);
2291 }
2292
2293 VkResult radv_EnumerateDeviceExtensionProperties(
2294     VkPhysicalDevice                            physicalDevice,
2295     const char*                                 pLayerName,
2296     uint32_t*                                   pPropertyCount,
2297     VkExtensionProperties*                      pProperties)
2298 {
2299         RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2300         VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2301
2302         for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2303                 if (device->supported_extensions.extensions[i]) {
2304                         vk_outarray_append(&out, prop) {
2305                                 *prop = radv_device_extensions[i];
2306                         }
2307                 }
2308         }
2309
2310         return vk_outarray_status(&out);
2311 }
2312
2313 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2314         VkInstance                                  instance,
2315         const char*                                 pName)
2316 {
2317         return radv_lookup_entrypoint(pName);
2318 }
2319
2320 /* The loader wants us to expose a second GetInstanceProcAddr function
2321  * to work around certain LD_PRELOAD issues seen in apps.
2322  */
2323 PUBLIC
2324 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2325         VkInstance                                  instance,
2326         const char*                                 pName);
2327
2328 PUBLIC
2329 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2330         VkInstance                                  instance,
2331         const char*                                 pName)
2332 {
2333         return radv_GetInstanceProcAddr(instance, pName);
2334 }
2335
2336 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2337         VkDevice                                    device,
2338         const char*                                 pName)
2339 {
2340         return radv_lookup_entrypoint(pName);
2341 }
2342
2343 bool radv_get_memory_fd(struct radv_device *device,
2344                         struct radv_device_memory *memory,
2345                         int *pFD)
2346 {
2347         struct radeon_bo_metadata metadata;
2348
2349         if (memory->image) {
2350                 radv_init_metadata(device, memory->image, &metadata);
2351                 device->ws->buffer_set_metadata(memory->bo, &metadata);
2352         }
2353
2354         return device->ws->buffer_get_fd(device->ws, memory->bo,
2355                                          pFD);
2356 }
2357
2358 static VkResult radv_alloc_memory(struct radv_device *device,
2359                                   const VkMemoryAllocateInfo*     pAllocateInfo,
2360                                   const VkAllocationCallbacks*    pAllocator,
2361                                   VkDeviceMemory*                 pMem)
2362 {
2363         struct radv_device_memory *mem;
2364         VkResult result;
2365         enum radeon_bo_domain domain;
2366         uint32_t flags = 0;
2367         enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2368
2369         assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2370
2371         if (pAllocateInfo->allocationSize == 0) {
2372                 /* Apparently, this is allowed */
2373                 *pMem = VK_NULL_HANDLE;
2374                 return VK_SUCCESS;
2375         }
2376
2377         const VkImportMemoryFdInfoKHR *import_info =
2378                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2379         const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2380                 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2381         const VkExportMemoryAllocateInfoKHR *export_info =
2382                 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2383         const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2384                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2385
2386         const struct wsi_memory_allocate_info *wsi_info =
2387                 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2388
2389         mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2390                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2391         if (mem == NULL)
2392                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2393
2394         if (wsi_info && wsi_info->implicit_sync)
2395                 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2396
2397         if (dedicate_info) {
2398                 mem->image = radv_image_from_handle(dedicate_info->image);
2399                 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2400         } else {
2401                 mem->image = NULL;
2402                 mem->buffer = NULL;
2403         }
2404
2405         mem->user_ptr = NULL;
2406
2407         if (import_info) {
2408                 assert(import_info->handleType ==
2409                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2410                        import_info->handleType ==
2411                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2412                 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2413                                                      NULL, NULL);
2414                 if (!mem->bo) {
2415                         result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2416                         goto fail;
2417                 } else {
2418                         close(import_info->fd);
2419                         goto out_success;
2420                 }
2421         }
2422
2423         if (host_ptr_info) {
2424                 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
2425                 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
2426                 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
2427                                                       pAllocateInfo->allocationSize);
2428                 if (!mem->bo) {
2429                         result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2430                         goto fail;
2431                 } else {
2432                         mem->user_ptr = host_ptr_info->pHostPointer;
2433                         goto out_success;
2434                 }
2435         }
2436
2437         uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2438         if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2439             mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2440                 domain = RADEON_DOMAIN_GTT;
2441         else
2442                 domain = RADEON_DOMAIN_VRAM;
2443
2444         if (mem_type_index == RADV_MEM_TYPE_VRAM)
2445                 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2446         else
2447                 flags |= RADEON_FLAG_CPU_ACCESS;
2448
2449         if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2450                 flags |= RADEON_FLAG_GTT_WC;
2451
2452         if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
2453                 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2454
2455         mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2456                                                domain, flags);
2457
2458         if (!mem->bo) {
2459                 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2460                 goto fail;
2461         }
2462         mem->type_index = mem_type_index;
2463 out_success:
2464         *pMem = radv_device_memory_to_handle(mem);
2465
2466         return VK_SUCCESS;
2467
2468 fail:
2469         vk_free2(&device->alloc, pAllocator, mem);
2470
2471         return result;
2472 }
2473
2474 VkResult radv_AllocateMemory(
2475         VkDevice                                    _device,
2476         const VkMemoryAllocateInfo*                 pAllocateInfo,
2477         const VkAllocationCallbacks*                pAllocator,
2478         VkDeviceMemory*                             pMem)
2479 {
2480         RADV_FROM_HANDLE(radv_device, device, _device);
2481         return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2482 }
2483
2484 void radv_FreeMemory(
2485         VkDevice                                    _device,
2486         VkDeviceMemory                              _mem,
2487         const VkAllocationCallbacks*                pAllocator)
2488 {
2489         RADV_FROM_HANDLE(radv_device, device, _device);
2490         RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2491
2492         if (mem == NULL)
2493                 return;
2494
2495         device->ws->buffer_destroy(mem->bo);
2496         mem->bo = NULL;
2497
2498         vk_free2(&device->alloc, pAllocator, mem);
2499 }
2500
2501 VkResult radv_MapMemory(
2502         VkDevice                                    _device,
2503         VkDeviceMemory                              _memory,
2504         VkDeviceSize                                offset,
2505         VkDeviceSize                                size,
2506         VkMemoryMapFlags                            flags,
2507         void**                                      ppData)
2508 {
2509         RADV_FROM_HANDLE(radv_device, device, _device);
2510         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2511
2512         if (mem == NULL) {
2513                 *ppData = NULL;
2514                 return VK_SUCCESS;
2515         }
2516
2517         if (mem->user_ptr)
2518                 *ppData = mem->user_ptr;
2519         else
2520                 *ppData = device->ws->buffer_map(mem->bo);
2521
2522         if (*ppData) {
2523                 *ppData += offset;
2524                 return VK_SUCCESS;
2525         }
2526
2527         return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2528 }
2529
2530 void radv_UnmapMemory(
2531         VkDevice                                    _device,
2532         VkDeviceMemory                              _memory)
2533 {
2534         RADV_FROM_HANDLE(radv_device, device, _device);
2535         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2536
2537         if (mem == NULL)
2538                 return;
2539
2540         if (mem->user_ptr == NULL)
2541                 device->ws->buffer_unmap(mem->bo);
2542 }
2543
2544 VkResult radv_FlushMappedMemoryRanges(
2545         VkDevice                                    _device,
2546         uint32_t                                    memoryRangeCount,
2547         const VkMappedMemoryRange*                  pMemoryRanges)
2548 {
2549         return VK_SUCCESS;
2550 }
2551
2552 VkResult radv_InvalidateMappedMemoryRanges(
2553         VkDevice                                    _device,
2554         uint32_t                                    memoryRangeCount,
2555         const VkMappedMemoryRange*                  pMemoryRanges)
2556 {
2557         return VK_SUCCESS;
2558 }
2559
2560 void radv_GetBufferMemoryRequirements(
2561         VkDevice                                    _device,
2562         VkBuffer                                    _buffer,
2563         VkMemoryRequirements*                       pMemoryRequirements)
2564 {
2565         RADV_FROM_HANDLE(radv_device, device, _device);
2566         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2567
2568         pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2569
2570         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2571                 pMemoryRequirements->alignment = 4096;
2572         else
2573                 pMemoryRequirements->alignment = 16;
2574
2575         pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2576 }
2577
2578 void radv_GetBufferMemoryRequirements2KHR(
2579         VkDevice                                     device,
2580         const VkBufferMemoryRequirementsInfo2KHR*    pInfo,
2581         VkMemoryRequirements2KHR*                    pMemoryRequirements)
2582 {
2583         radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2584                                         &pMemoryRequirements->memoryRequirements);
2585         RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2586         vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2587                 switch (ext->sType) {
2588                 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2589                         VkMemoryDedicatedRequirementsKHR *req =
2590                                        (VkMemoryDedicatedRequirementsKHR *) ext;
2591                         req->requiresDedicatedAllocation = buffer->shareable;
2592                         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2593                         break;
2594                 }
2595                 default:
2596                         break;
2597                 }
2598         }
2599 }
2600
2601 void radv_GetImageMemoryRequirements(
2602         VkDevice                                    _device,
2603         VkImage                                     _image,
2604         VkMemoryRequirements*                       pMemoryRequirements)
2605 {
2606         RADV_FROM_HANDLE(radv_device, device, _device);
2607         RADV_FROM_HANDLE(radv_image, image, _image);
2608
2609         pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2610
2611         pMemoryRequirements->size = image->size;
2612         pMemoryRequirements->alignment = image->alignment;
2613 }
2614
2615 void radv_GetImageMemoryRequirements2KHR(
2616         VkDevice                                    device,
2617         const VkImageMemoryRequirementsInfo2KHR*    pInfo,
2618         VkMemoryRequirements2KHR*                   pMemoryRequirements)
2619 {
2620         radv_GetImageMemoryRequirements(device, pInfo->image,
2621                                         &pMemoryRequirements->memoryRequirements);
2622
2623         RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2624
2625         vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2626                 switch (ext->sType) {
2627                 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2628                         VkMemoryDedicatedRequirementsKHR *req =
2629                                        (VkMemoryDedicatedRequirementsKHR *) ext;
2630                         req->requiresDedicatedAllocation = image->shareable;
2631                         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2632                         break;
2633                 }
2634                 default:
2635                         break;
2636                 }
2637         }
2638 }
2639
2640 void radv_GetImageSparseMemoryRequirements(
2641         VkDevice                                    device,
2642         VkImage                                     image,
2643         uint32_t*                                   pSparseMemoryRequirementCount,
2644         VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
2645 {
2646         stub();
2647 }
2648
2649 void radv_GetImageSparseMemoryRequirements2KHR(
2650         VkDevice                                    device,
2651         const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2652         uint32_t*                                   pSparseMemoryRequirementCount,
2653         VkSparseImageMemoryRequirements2KHR*            pSparseMemoryRequirements)
2654 {
2655         stub();
2656 }
2657
2658 void radv_GetDeviceMemoryCommitment(
2659         VkDevice                                    device,
2660         VkDeviceMemory                              memory,
2661         VkDeviceSize*                               pCommittedMemoryInBytes)
2662 {
2663         *pCommittedMemoryInBytes = 0;
2664 }
2665
2666 VkResult radv_BindBufferMemory2KHR(VkDevice device,
2667                                    uint32_t bindInfoCount,
2668                                    const VkBindBufferMemoryInfoKHR *pBindInfos)
2669 {
2670         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2671                 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2672                 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2673
2674                 if (mem) {
2675                         buffer->bo = mem->bo;
2676                         buffer->offset = pBindInfos[i].memoryOffset;
2677                 } else {
2678                         buffer->bo = NULL;
2679                 }
2680         }
2681         return VK_SUCCESS;
2682 }
2683
2684 VkResult radv_BindBufferMemory(
2685         VkDevice                                    device,
2686         VkBuffer                                    buffer,
2687         VkDeviceMemory                              memory,
2688         VkDeviceSize                                memoryOffset)
2689 {
2690         const VkBindBufferMemoryInfoKHR info = {
2691                 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2692                 .buffer = buffer,
2693                 .memory = memory,
2694                 .memoryOffset = memoryOffset
2695         };
2696
2697         return radv_BindBufferMemory2KHR(device, 1, &info);
2698 }
2699
2700 VkResult radv_BindImageMemory2KHR(VkDevice device,
2701                                   uint32_t bindInfoCount,
2702                                   const VkBindImageMemoryInfoKHR *pBindInfos)
2703 {
2704         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2705                 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2706                 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2707
2708                 if (mem) {
2709                         image->bo = mem->bo;
2710                         image->offset = pBindInfos[i].memoryOffset;
2711                 } else {
2712                         image->bo = NULL;
2713                         image->offset = 0;
2714                 }
2715         }
2716         return VK_SUCCESS;
2717 }
2718
2719
2720 VkResult radv_BindImageMemory(
2721         VkDevice                                    device,
2722         VkImage                                     image,
2723         VkDeviceMemory                              memory,
2724         VkDeviceSize                                memoryOffset)
2725 {
2726         const VkBindImageMemoryInfoKHR info = {
2727                 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2728                 .image = image,
2729                 .memory = memory,
2730                 .memoryOffset = memoryOffset
2731         };
2732
2733         return radv_BindImageMemory2KHR(device, 1, &info);
2734 }
2735
2736
2737 static void
2738 radv_sparse_buffer_bind_memory(struct radv_device *device,
2739                                const VkSparseBufferMemoryBindInfo *bind)
2740 {
2741         RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2742
2743         for (uint32_t i = 0; i < bind->bindCount; ++i) {
2744                 struct radv_device_memory *mem = NULL;
2745
2746                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2747                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2748
2749                 device->ws->buffer_virtual_bind(buffer->bo,
2750                                                 bind->pBinds[i].resourceOffset,
2751                                                 bind->pBinds[i].size,
2752                                                 mem ? mem->bo : NULL,
2753                                                 bind->pBinds[i].memoryOffset);
2754         }
2755 }
2756
2757 static void
2758 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2759                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
2760 {
2761         RADV_FROM_HANDLE(radv_image, image, bind->image);
2762
2763         for (uint32_t i = 0; i < bind->bindCount; ++i) {
2764                 struct radv_device_memory *mem = NULL;
2765
2766                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2767                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2768
2769                 device->ws->buffer_virtual_bind(image->bo,
2770                                                 bind->pBinds[i].resourceOffset,
2771                                                 bind->pBinds[i].size,
2772                                                 mem ? mem->bo : NULL,
2773                                                 bind->pBinds[i].memoryOffset);
2774         }
2775 }
2776
2777  VkResult radv_QueueBindSparse(
2778         VkQueue                                     _queue,
2779         uint32_t                                    bindInfoCount,
2780         const VkBindSparseInfo*                     pBindInfo,
2781         VkFence                                     _fence)
2782 {
2783         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2784         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2785         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2786         bool fence_emitted = false;
2787
2788         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2789                 struct radv_winsys_sem_info sem_info;
2790                 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2791                         radv_sparse_buffer_bind_memory(queue->device,
2792                                                        pBindInfo[i].pBufferBinds + j);
2793                 }
2794
2795                 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2796                         radv_sparse_image_opaque_bind_memory(queue->device,
2797                                                              pBindInfo[i].pImageOpaqueBinds + j);
2798                 }
2799
2800                 VkResult result;
2801                 result = radv_alloc_sem_info(&sem_info,
2802                                              pBindInfo[i].waitSemaphoreCount,
2803                                              pBindInfo[i].pWaitSemaphores,
2804                                              pBindInfo[i].signalSemaphoreCount,
2805                                              pBindInfo[i].pSignalSemaphores,
2806                                              _fence);
2807                 if (result != VK_SUCCESS)
2808                         return result;
2809
2810                 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2811                         queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2812                                                      &queue->device->empty_cs[queue->queue_family_index],
2813                                                      1, NULL, NULL,
2814                                                      &sem_info,
2815                                                      false, base_fence);
2816                         fence_emitted = true;
2817                         if (fence)
2818                                 fence->submitted = true;
2819                 }
2820
2821                 radv_free_sem_info(&sem_info);
2822
2823         }
2824
2825         if (fence) {
2826                 if (!fence_emitted) {
2827                         radv_signal_fence(queue, fence);
2828                 }
2829                 fence->submitted = true;
2830         }
2831
2832         return VK_SUCCESS;
2833 }
2834
2835 VkResult radv_CreateFence(
2836         VkDevice                                    _device,
2837         const VkFenceCreateInfo*                    pCreateInfo,
2838         const VkAllocationCallbacks*                pAllocator,
2839         VkFence*                                    pFence)
2840 {
2841         RADV_FROM_HANDLE(radv_device, device, _device);
2842         const VkExportFenceCreateInfoKHR *export =
2843                 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
2844         VkExternalFenceHandleTypeFlagsKHR handleTypes =
2845                 export ? export->handleTypes : 0;
2846
2847         struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2848                                                sizeof(*fence), 8,
2849                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2850
2851         if (!fence)
2852                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2853
2854         fence->submitted = false;
2855         fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2856         fence->temp_syncobj = 0;
2857         if (device->always_use_syncobj || handleTypes) {
2858                 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
2859                 if (ret) {
2860                         vk_free2(&device->alloc, pAllocator, fence);
2861                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2862                 }
2863                 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
2864                         device->ws->signal_syncobj(device->ws, fence->syncobj);
2865                 }
2866                 fence->fence = NULL;
2867         } else {
2868                 fence->fence = device->ws->create_fence();
2869                 if (!fence->fence) {
2870                         vk_free2(&device->alloc, pAllocator, fence);
2871                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2872                 }
2873                 fence->syncobj = 0;
2874         }
2875
2876         *pFence = radv_fence_to_handle(fence);
2877
2878         return VK_SUCCESS;
2879 }
2880
2881 void radv_DestroyFence(
2882         VkDevice                                    _device,
2883         VkFence                                     _fence,
2884         const VkAllocationCallbacks*                pAllocator)
2885 {
2886         RADV_FROM_HANDLE(radv_device, device, _device);
2887         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2888
2889         if (!fence)
2890                 return;
2891
2892         if (fence->temp_syncobj)
2893                 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
2894         if (fence->syncobj)
2895                 device->ws->destroy_syncobj(device->ws, fence->syncobj);
2896         if (fence->fence)
2897                 device->ws->destroy_fence(fence->fence);
2898         vk_free2(&device->alloc, pAllocator, fence);
2899 }
2900
2901 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2902 {
2903         uint64_t current_time;
2904         struct timespec tv;
2905
2906         clock_gettime(CLOCK_MONOTONIC, &tv);
2907         current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2908
2909         timeout = MIN2(UINT64_MAX - current_time, timeout);
2910
2911         return current_time + timeout;
2912 }
2913
2914 VkResult radv_WaitForFences(
2915         VkDevice                                    _device,
2916         uint32_t                                    fenceCount,
2917         const VkFence*                              pFences,
2918         VkBool32                                    waitAll,
2919         uint64_t                                    timeout)
2920 {
2921         RADV_FROM_HANDLE(radv_device, device, _device);
2922         timeout = radv_get_absolute_timeout(timeout);
2923
2924         if (!waitAll && fenceCount > 1) {
2925                 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2926         }
2927
2928         for (uint32_t i = 0; i < fenceCount; ++i) {
2929                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2930                 bool expired = false;
2931
2932                 if (fence->temp_syncobj) {
2933                         if (!device->ws->wait_syncobj(device->ws, fence->temp_syncobj, timeout))
2934                                 return VK_TIMEOUT;
2935                         continue;
2936                 }
2937
2938                 if (fence->syncobj) {
2939                         if (!device->ws->wait_syncobj(device->ws, fence->syncobj, timeout))
2940                                 return VK_TIMEOUT;
2941                         continue;
2942                 }
2943
2944                 if (fence->signalled)
2945                         continue;
2946
2947                 if (!fence->submitted)
2948                         return VK_TIMEOUT;
2949
2950                 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2951                 if (!expired)
2952                         return VK_TIMEOUT;
2953
2954                 fence->signalled = true;
2955         }
2956
2957         return VK_SUCCESS;
2958 }
2959
2960 VkResult radv_ResetFences(VkDevice _device,
2961                           uint32_t fenceCount,
2962                           const VkFence *pFences)
2963 {
2964         RADV_FROM_HANDLE(radv_device, device, _device);
2965
2966         for (unsigned i = 0; i < fenceCount; ++i) {
2967                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2968                 fence->submitted = fence->signalled = false;
2969
2970                 /* Per spec, we first restore the permanent payload, and then reset, so
2971                  * having a temp syncobj should not skip resetting the permanent syncobj. */
2972                 if (fence->temp_syncobj) {
2973                         device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
2974                         fence->temp_syncobj = 0;
2975                 }
2976
2977                 if (fence->syncobj) {
2978                         device->ws->reset_syncobj(device->ws, fence->syncobj);
2979                 }
2980         }
2981
2982         return VK_SUCCESS;
2983 }
2984
2985 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2986 {
2987         RADV_FROM_HANDLE(radv_device, device, _device);
2988         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2989
2990         if (fence->temp_syncobj) {
2991                         bool success = device->ws->wait_syncobj(device->ws, fence->temp_syncobj, 0);
2992                         return success ? VK_SUCCESS : VK_NOT_READY;
2993         }
2994
2995         if (fence->syncobj) {
2996                         bool success = device->ws->wait_syncobj(device->ws, fence->syncobj, 0);
2997                         return success ? VK_SUCCESS : VK_NOT_READY;
2998         }
2999
3000         if (fence->signalled)
3001                 return VK_SUCCESS;
3002         if (!fence->submitted)
3003                 return VK_NOT_READY;
3004         if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3005                 return VK_NOT_READY;
3006
3007         return VK_SUCCESS;
3008 }
3009
3010
3011 // Queue semaphore functions
3012
3013 VkResult radv_CreateSemaphore(
3014         VkDevice                                    _device,
3015         const VkSemaphoreCreateInfo*                pCreateInfo,
3016         const VkAllocationCallbacks*                pAllocator,
3017         VkSemaphore*                                pSemaphore)
3018 {
3019         RADV_FROM_HANDLE(radv_device, device, _device);
3020         const VkExportSemaphoreCreateInfoKHR *export =
3021                 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3022         VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3023                 export ? export->handleTypes : 0;
3024
3025         struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3026                                                sizeof(*sem), 8,
3027                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3028         if (!sem)
3029                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3030
3031         sem->temp_syncobj = 0;
3032         /* create a syncobject if we are going to export this semaphore */
3033         if (device->always_use_syncobj || handleTypes) {
3034                 assert (device->physical_device->rad_info.has_syncobj);
3035                 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3036                 if (ret) {
3037                         vk_free2(&device->alloc, pAllocator, sem);
3038                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3039                 }
3040                 sem->sem = NULL;
3041         } else {
3042                 sem->sem = device->ws->create_sem(device->ws);
3043                 if (!sem->sem) {
3044                         vk_free2(&device->alloc, pAllocator, sem);
3045                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3046                 }
3047                 sem->syncobj = 0;
3048         }
3049
3050         *pSemaphore = radv_semaphore_to_handle(sem);
3051         return VK_SUCCESS;
3052 }
3053
3054 void radv_DestroySemaphore(
3055         VkDevice                                    _device,
3056         VkSemaphore                                 _semaphore,
3057         const VkAllocationCallbacks*                pAllocator)
3058 {
3059         RADV_FROM_HANDLE(radv_device, device, _device);
3060         RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3061         if (!_semaphore)
3062                 return;
3063
3064         if (sem->syncobj)
3065                 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3066         else
3067                 device->ws->destroy_sem(sem->sem);
3068         vk_free2(&device->alloc, pAllocator, sem);
3069 }
3070
3071 VkResult radv_CreateEvent(
3072         VkDevice                                    _device,
3073         const VkEventCreateInfo*                    pCreateInfo,
3074         const VkAllocationCallbacks*                pAllocator,
3075         VkEvent*                                    pEvent)
3076 {
3077         RADV_FROM_HANDLE(radv_device, device, _device);
3078         struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3079                                                sizeof(*event), 8,
3080                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3081
3082         if (!event)
3083                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3084
3085         event->bo = device->ws->buffer_create(device->ws, 8, 8,
3086                                               RADEON_DOMAIN_GTT,
3087                                               RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3088         if (!event->bo) {
3089                 vk_free2(&device->alloc, pAllocator, event);
3090                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3091         }
3092
3093         event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3094
3095         *pEvent = radv_event_to_handle(event);
3096
3097         return VK_SUCCESS;
3098 }
3099
3100 void radv_DestroyEvent(
3101         VkDevice                                    _device,
3102         VkEvent                                     _event,
3103         const VkAllocationCallbacks*                pAllocator)
3104 {
3105         RADV_FROM_HANDLE(radv_device, device, _device);
3106         RADV_FROM_HANDLE(radv_event, event, _event);
3107
3108         if (!event)
3109                 return;
3110         device->ws->buffer_destroy(event->bo);
3111         vk_free2(&device->alloc, pAllocator, event);
3112 }
3113
3114 VkResult radv_GetEventStatus(
3115         VkDevice                                    _device,
3116         VkEvent                                     _event)
3117 {
3118         RADV_FROM_HANDLE(radv_event, event, _event);
3119
3120         if (*event->map == 1)
3121                 return VK_EVENT_SET;
3122         return VK_EVENT_RESET;
3123 }
3124
3125 VkResult radv_SetEvent(
3126         VkDevice                                    _device,
3127         VkEvent                                     _event)
3128 {
3129         RADV_FROM_HANDLE(radv_event, event, _event);
3130         *event->map = 1;
3131
3132         return VK_SUCCESS;
3133 }
3134
3135 VkResult radv_ResetEvent(
3136     VkDevice                                    _device,
3137     VkEvent                                     _event)
3138 {
3139         RADV_FROM_HANDLE(radv_event, event, _event);
3140         *event->map = 0;
3141
3142         return VK_SUCCESS;
3143 }
3144
3145 VkResult radv_CreateBuffer(
3146         VkDevice                                    _device,
3147         const VkBufferCreateInfo*                   pCreateInfo,
3148         const VkAllocationCallbacks*                pAllocator,
3149         VkBuffer*                                   pBuffer)
3150 {
3151         RADV_FROM_HANDLE(radv_device, device, _device);
3152         struct radv_buffer *buffer;
3153
3154         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3155
3156         buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3157                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3158         if (buffer == NULL)
3159                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3160
3161         buffer->size = pCreateInfo->size;
3162         buffer->usage = pCreateInfo->usage;
3163         buffer->bo = NULL;
3164         buffer->offset = 0;
3165         buffer->flags = pCreateInfo->flags;
3166
3167         buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3168                                                  EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3169
3170         if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3171                 buffer->bo = device->ws->buffer_create(device->ws,
3172                                                        align64(buffer->size, 4096),
3173                                                        4096, 0, RADEON_FLAG_VIRTUAL);
3174                 if (!buffer->bo) {
3175                         vk_free2(&device->alloc, pAllocator, buffer);
3176                         return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3177                 }
3178         }
3179
3180         *pBuffer = radv_buffer_to_handle(buffer);
3181
3182         return VK_SUCCESS;
3183 }
3184
3185 void radv_DestroyBuffer(
3186         VkDevice                                    _device,
3187         VkBuffer                                    _buffer,
3188         const VkAllocationCallbacks*                pAllocator)
3189 {
3190         RADV_FROM_HANDLE(radv_device, device, _device);
3191         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3192
3193         if (!buffer)
3194                 return;
3195
3196         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3197                 device->ws->buffer_destroy(buffer->bo);
3198
3199         vk_free2(&device->alloc, pAllocator, buffer);
3200 }
3201
3202 static inline unsigned
3203 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3204 {
3205         if (stencil)
3206                 return image->surface.u.legacy.stencil_tiling_index[level];
3207         else
3208                 return image->surface.u.legacy.tiling_index[level];
3209 }
3210
3211 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3212 {
3213         return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3214 }
3215
3216 static void
3217 radv_initialise_color_surface(struct radv_device *device,
3218                               struct radv_color_buffer_info *cb,
3219                               struct radv_image_view *iview)
3220 {
3221         const struct vk_format_description *desc;
3222         unsigned ntype, format, swap, endian;
3223         unsigned blend_clamp = 0, blend_bypass = 0;
3224         uint64_t va;
3225         const struct radeon_surf *surf = &iview->image->surface;
3226
3227         desc = vk_format_description(iview->vk_format);
3228
3229         memset(cb, 0, sizeof(*cb));
3230
3231         /* Intensity is implemented as Red, so treat it that way. */
3232         cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3233
3234         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3235
3236         cb->cb_color_base = va >> 8;
3237
3238         if (device->physical_device->rad_info.chip_class >= GFX9) {
3239                 struct gfx9_surf_meta_flags meta;
3240                 if (iview->image->dcc_offset)
3241                         meta = iview->image->surface.u.gfx9.dcc;
3242                 else
3243                         meta = iview->image->surface.u.gfx9.cmask;
3244
3245                 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3246                         S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3247                         S_028C74_RB_ALIGNED(meta.rb_aligned) |
3248                         S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3249
3250                 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3251                 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3252         } else {
3253                 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3254                 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3255
3256                 cb->cb_color_base += level_info->offset >> 8;
3257                 if (level_info->mode == RADEON_SURF_MODE_2D)
3258                         cb->cb_color_base |= iview->image->surface.tile_swizzle;
3259
3260                 pitch_tile_max = level_info->nblk_x / 8 - 1;
3261                 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3262                 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3263
3264                 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3265                 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3266                 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3267
3268                 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3269
3270                 if (iview->image->fmask.size) {
3271                         if (device->physical_device->rad_info.chip_class >= CIK)
3272                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3273                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3274                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3275                 } else {
3276                         /* This must be set for fast clear to work without FMASK. */
3277                         if (device->physical_device->rad_info.chip_class >= CIK)
3278                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3279                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3280                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3281                 }
3282         }
3283
3284         /* CMASK variables */
3285         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3286         va += iview->image->cmask.offset;
3287         cb->cb_color_cmask = va >> 8;
3288
3289         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3290         va += iview->image->dcc_offset;
3291         cb->cb_dcc_base = va >> 8;
3292         cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3293
3294         uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3295         cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3296                 S_028C6C_SLICE_MAX(max_slice);
3297
3298         if (iview->image->info.samples > 1) {
3299                 unsigned log_samples = util_logbase2(iview->image->info.samples);
3300
3301                 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3302                         S_028C74_NUM_FRAGMENTS(log_samples);
3303         }
3304
3305         if (iview->image->fmask.size) {
3306                 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3307                 cb->cb_color_fmask = va >> 8;
3308                 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3309         } else {
3310                 cb->cb_color_fmask = cb->cb_color_base;
3311         }
3312
3313         ntype = radv_translate_color_numformat(iview->vk_format,
3314                                                desc,
3315                                                vk_format_get_first_non_void_channel(iview->vk_format));
3316         format = radv_translate_colorformat(iview->vk_format);
3317         if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3318                 radv_finishme("Illegal color\n");
3319         swap = radv_translate_colorswap(iview->vk_format, FALSE);
3320         endian = radv_colorformat_endian_swap(format);
3321
3322         /* blend clamp should be set for all NORM/SRGB types */
3323         if (ntype == V_028C70_NUMBER_UNORM ||
3324             ntype == V_028C70_NUMBER_SNORM ||
3325             ntype == V_028C70_NUMBER_SRGB)
3326                 blend_clamp = 1;
3327
3328         /* set blend bypass according to docs if SINT/UINT or
3329            8/24 COLOR variants */
3330         if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3331             format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3332             format == V_028C70_COLOR_X24_8_32_FLOAT) {
3333                 blend_clamp = 0;
3334                 blend_bypass = 1;
3335         }
3336 #if 0
3337         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3338             (format == V_028C70_COLOR_8 ||
3339              format == V_028C70_COLOR_8_8 ||
3340              format == V_028C70_COLOR_8_8_8_8))
3341                 ->color_is_int8 = true;
3342 #endif
3343         cb->cb_color_info = S_028C70_FORMAT(format) |
3344                 S_028C70_COMP_SWAP(swap) |
3345                 S_028C70_BLEND_CLAMP(blend_clamp) |
3346                 S_028C70_BLEND_BYPASS(blend_bypass) |
3347                 S_028C70_SIMPLE_FLOAT(1) |
3348                 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3349                                     ntype != V_028C70_NUMBER_SNORM &&
3350                                     ntype != V_028C70_NUMBER_SRGB &&
3351                                     format != V_028C70_COLOR_8_24 &&
3352                                     format != V_028C70_COLOR_24_8) |
3353                 S_028C70_NUMBER_TYPE(ntype) |
3354                 S_028C70_ENDIAN(endian);
3355         if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3356                 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3357                 if (device->physical_device->rad_info.chip_class == SI) {
3358                         unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3359                         cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3360                 }
3361         }
3362
3363         if (iview->image->cmask.size &&
3364             !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3365                 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3366
3367         if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3368                 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3369
3370         if (device->physical_device->rad_info.chip_class >= VI) {
3371                 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3372                 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3373                 unsigned independent_64b_blocks = 0;
3374                 unsigned max_compressed_block_size;
3375
3376                 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
3377                    64 for APU because all of our APUs to date use DIMMs which have
3378                    a request granularity size of 64B while all other chips have a
3379                    32B request size */
3380                 if (!device->physical_device->rad_info.has_dedicated_vram)
3381                         min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3382
3383                 if (iview->image->info.samples > 1) {
3384                         if (iview->image->surface.bpe == 1)
3385                                 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3386                         else if (iview->image->surface.bpe == 2)
3387                                 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3388                 }
3389
3390                 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3391                                            VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3392                         independent_64b_blocks = 1;
3393                         max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3394                 } else
3395                         max_compressed_block_size = max_uncompressed_block_size;
3396
3397                 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3398                         S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3399                         S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3400                         S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3401         }
3402
3403         /* This must be set for fast clear to work without FMASK. */
3404         if (!iview->image->fmask.size &&
3405             device->physical_device->rad_info.chip_class == SI) {
3406                 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3407                 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3408         }
3409
3410         if (device->physical_device->rad_info.chip_class >= GFX9) {
3411                 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3412                   (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3413
3414                 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3415                 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3416                         S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3417                 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3418                         S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3419                         S_028C68_MAX_MIP(iview->image->info.levels - 1);
3420         }
3421 }
3422
3423 static void
3424 radv_initialise_ds_surface(struct radv_device *device,
3425                            struct radv_ds_buffer_info *ds,
3426                            struct radv_image_view *iview)
3427 {
3428         unsigned level = iview->base_mip;
3429         unsigned format, stencil_format;
3430         uint64_t va, s_offs, z_offs;
3431         bool stencil_only = false;
3432         memset(ds, 0, sizeof(*ds));
3433         switch (iview->image->vk_format) {
3434         case VK_FORMAT_D24_UNORM_S8_UINT:
3435         case VK_FORMAT_X8_D24_UNORM_PACK32:
3436                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3437                 ds->offset_scale = 2.0f;
3438                 break;
3439         case VK_FORMAT_D16_UNORM:
3440         case VK_FORMAT_D16_UNORM_S8_UINT:
3441                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3442                 ds->offset_scale = 4.0f;
3443                 break;
3444         case VK_FORMAT_D32_SFLOAT:
3445         case VK_FORMAT_D32_SFLOAT_S8_UINT:
3446                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3447                         S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3448                 ds->offset_scale = 1.0f;
3449                 break;
3450         case VK_FORMAT_S8_UINT:
3451                 stencil_only = true;
3452                 break;
3453         default:
3454                 break;
3455         }
3456
3457         format = radv_translate_dbformat(iview->image->vk_format);
3458         stencil_format = iview->image->surface.has_stencil ?
3459                 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3460
3461         uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3462         ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3463                 S_028008_SLICE_MAX(max_slice);
3464
3465         ds->db_htile_data_base = 0;
3466         ds->db_htile_surface = 0;
3467
3468         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3469         s_offs = z_offs = va;
3470
3471         if (device->physical_device->rad_info.chip_class >= GFX9) {
3472                 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3473                 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3474
3475                 ds->db_z_info = S_028038_FORMAT(format) |
3476                         S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3477                         S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3478                         S_028038_MAXMIP(iview->image->info.levels - 1);
3479                 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3480                         S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3481
3482                 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3483                 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3484                 ds->db_depth_view |= S_028008_MIPID(level);
3485
3486                 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3487                         S_02801C_Y_MAX(iview->image->info.height - 1);
3488
3489                 if (radv_htile_enabled(iview->image, level)) {
3490                         ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3491
3492                         if (iview->image->tc_compatible_htile) {
3493                                 unsigned max_zplanes = 4;
3494
3495                                 if (iview->vk_format == VK_FORMAT_D16_UNORM  &&
3496                                     iview->image->info.samples > 1)
3497                                         max_zplanes = 2;
3498
3499                                 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3500                                           S_028038_ITERATE_FLUSH(1);
3501                                 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3502                         }
3503
3504                         if (!iview->image->surface.has_stencil)
3505                                 /* Use all of the htile_buffer for depth if there's no stencil. */
3506                                 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3507                         va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3508                                 iview->image->htile_offset;
3509                         ds->db_htile_data_base = va >> 8;
3510                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3511                                 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3512                                 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3513                 }
3514         } else {
3515                 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3516
3517                 if (stencil_only)
3518                         level_info = &iview->image->surface.u.legacy.stencil_level[level];
3519
3520                 z_offs += iview->image->surface.u.legacy.level[level].offset;
3521                 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3522
3523                 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3524                 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3525                 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3526
3527                 if (iview->image->info.samples > 1)
3528                         ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3529
3530                 if (device->physical_device->rad_info.chip_class >= CIK) {
3531                         struct radeon_info *info = &device->physical_device->rad_info;
3532                         unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3533                         unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3534                         unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3535                         unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3536                         unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3537                         unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3538
3539                         if (stencil_only)
3540                                 tile_mode = stencil_tile_mode;
3541
3542                         ds->db_depth_info |=
3543                                 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3544                                 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3545                                 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3546                                 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3547                                 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3548                                 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3549                         ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3550                         ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3551                 } else {
3552                         unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3553                         ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3554                         tile_mode_index = si_tile_mode_index(iview->image, level, true);
3555                         ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3556                         if (stencil_only)
3557                                 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3558                 }
3559
3560                 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3561                         S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3562                 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3563
3564                 if (radv_htile_enabled(iview->image, level)) {
3565                         ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3566
3567                         if (!iview->image->surface.has_stencil &&
3568                             !iview->image->tc_compatible_htile)
3569                                 /* Use all of the htile_buffer for depth if there's no stencil. */
3570                                 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3571
3572                         va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3573                                 iview->image->htile_offset;
3574                         ds->db_htile_data_base = va >> 8;
3575                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3576
3577                         if (iview->image->tc_compatible_htile) {
3578                                 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3579
3580                                 if (iview->image->info.samples <= 1)
3581                                         ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3582                                 else if (iview->image->info.samples <= 4)
3583                                         ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3584                                 else
3585                                         ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3586                         }
3587                 }
3588         }
3589
3590         ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3591         ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3592 }
3593
3594 VkResult radv_CreateFramebuffer(
3595         VkDevice                                    _device,
3596         const VkFramebufferCreateInfo*              pCreateInfo,
3597         const VkAllocationCallbacks*                pAllocator,
3598         VkFramebuffer*                              pFramebuffer)
3599 {
3600         RADV_FROM_HANDLE(radv_device, device, _device);
3601         struct radv_framebuffer *framebuffer;
3602
3603         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3604
3605         size_t size = sizeof(*framebuffer) +
3606                 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3607         framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3608                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3609         if (framebuffer == NULL)
3610                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3611
3612         framebuffer->attachment_count = pCreateInfo->attachmentCount;
3613         framebuffer->width = pCreateInfo->width;
3614         framebuffer->height = pCreateInfo->height;
3615         framebuffer->layers = pCreateInfo->layers;
3616         for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3617                 VkImageView _iview = pCreateInfo->pAttachments[i];
3618                 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3619                 framebuffer->attachments[i].attachment = iview;
3620                 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3621                         radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3622                 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3623                         radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3624                 }
3625                 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3626                 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3627                 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
3628         }
3629
3630         *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3631         return VK_SUCCESS;
3632 }
3633
3634 void radv_DestroyFramebuffer(
3635         VkDevice                                    _device,
3636         VkFramebuffer                               _fb,
3637         const VkAllocationCallbacks*                pAllocator)
3638 {
3639         RADV_FROM_HANDLE(radv_device, device, _device);
3640         RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3641
3642         if (!fb)
3643                 return;
3644         vk_free2(&device->alloc, pAllocator, fb);
3645 }
3646
3647 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3648 {
3649         switch (address_mode) {
3650         case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3651                 return V_008F30_SQ_TEX_WRAP;
3652         case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3653                 return V_008F30_SQ_TEX_MIRROR;
3654         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3655                 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3656         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3657                 return V_008F30_SQ_TEX_CLAMP_BORDER;
3658         case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3659                 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3660         default:
3661                 unreachable("illegal tex wrap mode");
3662                 break;
3663         }
3664 }
3665
3666 static unsigned
3667 radv_tex_compare(VkCompareOp op)
3668 {
3669         switch (op) {
3670         case VK_COMPARE_OP_NEVER:
3671                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3672         case VK_COMPARE_OP_LESS:
3673                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3674         case VK_COMPARE_OP_EQUAL:
3675                 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3676         case VK_COMPARE_OP_LESS_OR_EQUAL:
3677                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3678         case VK_COMPARE_OP_GREATER:
3679                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3680         case VK_COMPARE_OP_NOT_EQUAL:
3681                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3682         case VK_COMPARE_OP_GREATER_OR_EQUAL:
3683                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3684         case VK_COMPARE_OP_ALWAYS:
3685                 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3686         default:
3687                 unreachable("illegal compare mode");
3688                 break;
3689         }
3690 }
3691
3692 static unsigned
3693 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3694 {
3695         switch (filter) {
3696         case VK_FILTER_NEAREST:
3697                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3698                         V_008F38_SQ_TEX_XY_FILTER_POINT);
3699         case VK_FILTER_LINEAR:
3700                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3701                         V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3702         case VK_FILTER_CUBIC_IMG:
3703         default:
3704                 fprintf(stderr, "illegal texture filter");
3705                 return 0;
3706         }
3707 }
3708
3709 static unsigned
3710 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3711 {
3712         switch (mode) {
3713         case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3714                 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3715         case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3716                 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3717         default:
3718                 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3719         }
3720 }
3721
3722 static unsigned
3723 radv_tex_bordercolor(VkBorderColor bcolor)
3724 {
3725         switch (bcolor) {
3726         case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3727         case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3728                 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3729         case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3730         case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3731                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3732         case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3733         case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3734                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3735         default:
3736                 break;
3737         }
3738         return 0;
3739 }
3740
3741 static unsigned
3742 radv_tex_aniso_filter(unsigned filter)
3743 {
3744         if (filter < 2)
3745                 return 0;
3746         if (filter < 4)
3747                 return 1;
3748         if (filter < 8)
3749                 return 2;
3750         if (filter < 16)
3751                 return 3;
3752         return 4;
3753 }
3754
3755 static void
3756 radv_init_sampler(struct radv_device *device,
3757                   struct radv_sampler *sampler,
3758                   const VkSamplerCreateInfo *pCreateInfo)
3759 {
3760         uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3761                                         (uint32_t) pCreateInfo->maxAnisotropy : 0;
3762         uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3763         bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3764
3765         sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3766                              S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3767                              S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3768                              S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3769                              S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3770                              S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3771                              S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3772                              S_008F30_ANISO_BIAS(max_aniso_ratio) |
3773                              S_008F30_DISABLE_CUBE_WRAP(0) |
3774                              S_008F30_COMPAT_MODE(is_vi));
3775         sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3776                              S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3777                              S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3778         sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3779                              S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3780                              S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3781                              S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3782                              S_008F38_MIP_POINT_PRECLAMP(0) |
3783                              S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
3784                              S_008F38_FILTER_PREC_FIX(1) |
3785                              S_008F38_ANISO_OVERRIDE(is_vi));
3786         sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3787                              S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3788 }
3789
3790 VkResult radv_CreateSampler(
3791         VkDevice                                    _device,
3792         const VkSamplerCreateInfo*                  pCreateInfo,
3793         const VkAllocationCallbacks*                pAllocator,
3794         VkSampler*                                  pSampler)
3795 {
3796         RADV_FROM_HANDLE(radv_device, device, _device);
3797         struct radv_sampler *sampler;
3798
3799         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3800
3801         sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3802                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3803         if (!sampler)
3804                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3805
3806         radv_init_sampler(device, sampler, pCreateInfo);
3807         *pSampler = radv_sampler_to_handle(sampler);
3808
3809         return VK_SUCCESS;
3810 }
3811
3812 void radv_DestroySampler(
3813         VkDevice                                    _device,
3814         VkSampler                                   _sampler,
3815         const VkAllocationCallbacks*                pAllocator)
3816 {
3817         RADV_FROM_HANDLE(radv_device, device, _device);
3818         RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3819
3820         if (!sampler)
3821                 return;
3822         vk_free2(&device->alloc, pAllocator, sampler);
3823 }
3824
3825 /* vk_icd.h does not declare this function, so we declare it here to
3826  * suppress Wmissing-prototypes.
3827  */
3828 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3829 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3830
3831 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3832 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3833 {
3834         /* For the full details on loader interface versioning, see
3835         * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3836         * What follows is a condensed summary, to help you navigate the large and
3837         * confusing official doc.
3838         *
3839         *   - Loader interface v0 is incompatible with later versions. We don't
3840         *     support it.
3841         *
3842         *   - In loader interface v1:
3843         *       - The first ICD entrypoint called by the loader is
3844         *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3845         *         entrypoint.
3846         *       - The ICD must statically expose no other Vulkan symbol unless it is
3847         *         linked with -Bsymbolic.
3848         *       - Each dispatchable Vulkan handle created by the ICD must be
3849         *         a pointer to a struct whose first member is VK_LOADER_DATA. The
3850         *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3851         *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3852         *         vkDestroySurfaceKHR(). The ICD must be capable of working with
3853         *         such loader-managed surfaces.
3854         *
3855         *    - Loader interface v2 differs from v1 in:
3856         *       - The first ICD entrypoint called by the loader is
3857         *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3858         *         statically expose this entrypoint.
3859         *
3860         *    - Loader interface v3 differs from v2 in:
3861         *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3862         *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3863         *          because the loader no longer does so.
3864         */
3865         *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3866         return VK_SUCCESS;
3867 }
3868
3869 VkResult radv_GetMemoryFdKHR(VkDevice _device,
3870                              const VkMemoryGetFdInfoKHR *pGetFdInfo,
3871                              int *pFD)
3872 {
3873         RADV_FROM_HANDLE(radv_device, device, _device);
3874         RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
3875
3876         assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3877
3878         /* At the moment, we support only the below handle types. */
3879         assert(pGetFdInfo->handleType ==
3880                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
3881                pGetFdInfo->handleType ==
3882                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3883
3884         bool ret = radv_get_memory_fd(device, memory, pFD);
3885         if (ret == false)
3886                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3887         return VK_SUCCESS;
3888 }
3889
3890 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
3891                                        VkExternalMemoryHandleTypeFlagBitsKHR handleType,
3892                                        int fd,
3893                                        VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3894 {
3895    switch (handleType) {
3896    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3897       pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
3898       return VK_SUCCESS;
3899
3900    default:
3901       /* The valid usage section for this function says:
3902        *
3903        *    "handleType must not be one of the handle types defined as
3904        *    opaque."
3905        *
3906        * So opaque handle types fall into the default "unsupported" case.
3907        */
3908       return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3909    }
3910 }
3911
3912 static VkResult radv_import_opaque_fd(struct radv_device *device,
3913                                       int fd,
3914                                       uint32_t *syncobj)
3915 {
3916         uint32_t syncobj_handle = 0;
3917         int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
3918         if (ret != 0)
3919                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3920
3921         if (*syncobj)
3922                 device->ws->destroy_syncobj(device->ws, *syncobj);
3923
3924         *syncobj = syncobj_handle;
3925         close(fd);
3926
3927         return VK_SUCCESS;
3928 }
3929
3930 static VkResult radv_import_sync_fd(struct radv_device *device,
3931                                     int fd,
3932                                     uint32_t *syncobj)
3933 {
3934         /* If we create a syncobj we do it locally so that if we have an error, we don't
3935          * leave a syncobj in an undetermined state in the fence. */
3936         uint32_t syncobj_handle =  *syncobj;
3937         if (!syncobj_handle) {
3938                 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
3939                 if (ret) {
3940                         return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3941                 }
3942         }
3943
3944         if (fd == -1) {
3945                 device->ws->signal_syncobj(device->ws, syncobj_handle);
3946         } else {
3947                 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
3948         if (ret != 0)
3949                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3950         }
3951
3952         *syncobj = syncobj_handle;
3953         if (fd != -1)
3954                 close(fd);
3955
3956         return VK_SUCCESS;
3957 }
3958
3959 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
3960                                    const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
3961 {
3962         RADV_FROM_HANDLE(radv_device, device, _device);
3963         RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
3964         uint32_t *syncobj_dst = NULL;
3965
3966         if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
3967                 syncobj_dst = &sem->temp_syncobj;
3968         } else {
3969                 syncobj_dst = &sem->syncobj;
3970         }
3971
3972         switch(pImportSemaphoreFdInfo->handleType) {
3973                 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
3974                         return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
3975                 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
3976                         return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
3977                 default:
3978                         unreachable("Unhandled semaphore handle type");
3979         }
3980 }
3981
3982 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
3983                                 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
3984                                 int *pFd)
3985 {
3986         RADV_FROM_HANDLE(radv_device, device, _device);
3987         RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
3988         int ret;
3989         uint32_t syncobj_handle;
3990
3991         if (sem->temp_syncobj)
3992                 syncobj_handle = sem->temp_syncobj;
3993         else
3994                 syncobj_handle = sem->syncobj;
3995
3996         switch(pGetFdInfo->handleType) {
3997         case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
3998                 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
3999                 break;
4000         case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4001                 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4002                 if (!ret) {
4003                         if (sem->temp_syncobj) {
4004                                 close (sem->temp_syncobj);
4005                                 sem->temp_syncobj = 0;
4006                         } else {
4007                                 device->ws->reset_syncobj(device->ws, syncobj_handle);
4008                         }
4009                 }
4010                 break;
4011         default:
4012                 unreachable("Unhandled semaphore handle type");
4013         }
4014
4015         if (ret)
4016                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4017         return VK_SUCCESS;
4018 }
4019
4020 void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
4021         VkPhysicalDevice                            physicalDevice,
4022         const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4023         VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
4024 {
4025         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4026
4027         /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4028         if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4029             (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
4030              pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4031                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4032                 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4033                 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4034                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4035         } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4036                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4037                 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4038                 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4039                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4040         } else {
4041                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4042                 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4043                 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4044         }
4045 }
4046
4047 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4048                                    const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4049 {
4050         RADV_FROM_HANDLE(radv_device, device, _device);
4051         RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4052         uint32_t *syncobj_dst = NULL;
4053
4054
4055         if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4056                 syncobj_dst = &fence->temp_syncobj;
4057         } else {
4058                 syncobj_dst = &fence->syncobj;
4059         }
4060
4061         switch(pImportFenceFdInfo->handleType) {
4062                 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4063                         return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4064                 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4065                         return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4066                 default:
4067                         unreachable("Unhandled fence handle type");
4068         }
4069 }
4070
4071 VkResult radv_GetFenceFdKHR(VkDevice _device,
4072                                 const VkFenceGetFdInfoKHR *pGetFdInfo,
4073                                 int *pFd)
4074 {
4075         RADV_FROM_HANDLE(radv_device, device, _device);
4076         RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4077         int ret;
4078         uint32_t syncobj_handle;
4079
4080         if (fence->temp_syncobj)
4081                 syncobj_handle = fence->temp_syncobj;
4082         else
4083                 syncobj_handle = fence->syncobj;
4084
4085         switch(pGetFdInfo->handleType) {
4086         case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4087                 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4088                 break;
4089         case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4090                 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4091                 if (!ret) {
4092                         if (fence->temp_syncobj) {
4093                                 close (fence->temp_syncobj);
4094                                 fence->temp_syncobj = 0;
4095                         } else {
4096                                 device->ws->reset_syncobj(device->ws, syncobj_handle);
4097                         }
4098                 }
4099                 break;
4100         default:
4101                 unreachable("Unhandled fence handle type");
4102         }
4103
4104         if (ret)
4105                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4106         return VK_SUCCESS;
4107 }
4108
4109 void radv_GetPhysicalDeviceExternalFencePropertiesKHR(
4110         VkPhysicalDevice                            physicalDevice,
4111         const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4112         VkExternalFencePropertiesKHR*           pExternalFenceProperties)
4113 {
4114         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4115
4116         if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4117             (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
4118              pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4119                 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4120                 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4121                 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4122                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4123         } else {
4124                 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4125                 pExternalFenceProperties->compatibleHandleTypes = 0;
4126                 pExternalFenceProperties->externalFenceFeatures = 0;
4127         }
4128 }
4129
4130 VkResult
4131 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4132                                  const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4133                                  const VkAllocationCallbacks* pAllocator,
4134                                  VkDebugReportCallbackEXT* pCallback)
4135 {
4136         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4137         return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4138                                                pCreateInfo, pAllocator, &instance->alloc,
4139                                                pCallback);
4140 }
4141
4142 void
4143 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4144                                   VkDebugReportCallbackEXT _callback,
4145                                   const VkAllocationCallbacks* pAllocator)
4146 {
4147         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4148         vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4149                                          _callback, pAllocator, &instance->alloc);
4150 }
4151
4152 void
4153 radv_DebugReportMessageEXT(VkInstance _instance,
4154                           VkDebugReportFlagsEXT flags,
4155                           VkDebugReportObjectTypeEXT objectType,
4156                           uint64_t object,
4157                           size_t location,
4158                           int32_t messageCode,
4159                           const char* pLayerPrefix,
4160                           const char* pMessage)
4161 {
4162         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4163         vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4164                         object, location, messageCode, pLayerPrefix, pMessage);
4165 }