OSDN Git Service

radv: Expose that we don't support any VK_KHR_16_bit_storage parts.
[android-x86/external-mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52         uint32_t mesa_timestamp, llvm_timestamp;
53         uint16_t f = family;
54         memset(uuid, 0, VK_UUID_SIZE);
55         if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56             !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57                 return -1;
58
59         memcpy(uuid, &mesa_timestamp, 4);
60         memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61         memcpy((char*)uuid + 8, &f, 2);
62         snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63         return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69         ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75         ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static void
79 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
80 {
81         const char *chip_string;
82         char llvm_string[32] = {};
83
84         switch (family) {
85         case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
86         case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
87         case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
88         case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
89         case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
90         case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
91         case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
92         case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
93         case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
94         case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
95         case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
96         case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
97         case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
98         case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
99         case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
100         case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
101         case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
102         case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
103         case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
104         case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
105         default: chip_string = "AMD RADV unknown"; break;
106         }
107
108         if (HAVE_LLVM > 0) {
109                 snprintf(llvm_string, sizeof(llvm_string),
110                          " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
111                          HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
112         }
113
114         snprintf(name, name_len, "%s%s", chip_string, llvm_string);
115 }
116
117 static void
118 radv_physical_device_init_mem_types(struct radv_physical_device *device)
119 {
120         STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
121         uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
122                                           device->rad_info.vram_vis_size);
123
124         int vram_index = -1, visible_vram_index = -1, gart_index = -1;
125         device->memory_properties.memoryHeapCount = 0;
126         if (device->rad_info.vram_size - visible_vram_size > 0) {
127                 vram_index = device->memory_properties.memoryHeapCount++;
128                 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
129                         .size = device->rad_info.vram_size - visible_vram_size,
130                         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
131                 };
132         }
133         if (visible_vram_size) {
134                 visible_vram_index = device->memory_properties.memoryHeapCount++;
135                 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
136                         .size = visible_vram_size,
137                         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
138                 };
139         }
140         if (device->rad_info.gart_size > 0) {
141                 gart_index = device->memory_properties.memoryHeapCount++;
142                 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
143                         .size = device->rad_info.gart_size,
144                         .flags = 0,
145                 };
146         }
147
148         STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
149         unsigned type_count = 0;
150         if (vram_index >= 0) {
151                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
152                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
153                         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
154                         .heapIndex = vram_index,
155                 };
156         }
157         if (gart_index >= 0) {
158                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
159                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
160                         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
161                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
162                         .heapIndex = gart_index,
163                 };
164         }
165         if (visible_vram_index >= 0) {
166                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
167                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
168                         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
169                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
170                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
171                         .heapIndex = visible_vram_index,
172                 };
173         }
174         if (gart_index >= 0) {
175                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
176                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
177                         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
178                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
179                         VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
180                         .heapIndex = gart_index,
181                 };
182         }
183         device->memory_properties.memoryTypeCount = type_count;
184 }
185
186 static void
187 radv_handle_env_var_force_family(struct radv_physical_device *device)
188 {
189         const char *family = getenv("RADV_FORCE_FAMILY");
190         unsigned i;
191
192         if (!family)
193                 return;
194
195         for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
196                 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
197                         /* Override family and chip_class. */
198                         device->rad_info.family = i;
199
200                         if (i >= CHIP_VEGA10)
201                                 device->rad_info.chip_class = GFX9;
202                         else if (i >= CHIP_TONGA)
203                                 device->rad_info.chip_class = VI;
204                         else if (i >= CHIP_BONAIRE)
205                                 device->rad_info.chip_class = CIK;
206                         else
207                                 device->rad_info.chip_class = SI;
208
209                         return;
210                 }
211         }
212
213         fprintf(stderr, "radv: Unknown family: %s\n", family);
214         exit(1);
215 }
216
217 static VkResult
218 radv_physical_device_init(struct radv_physical_device *device,
219                           struct radv_instance *instance,
220                           drmDevicePtr drm_device)
221 {
222         const char *path = drm_device->nodes[DRM_NODE_RENDER];
223         VkResult result;
224         drmVersionPtr version;
225         int fd;
226
227         fd = open(path, O_RDWR | O_CLOEXEC);
228         if (fd < 0)
229                 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
230
231         version = drmGetVersion(fd);
232         if (!version) {
233                 close(fd);
234                 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
235                                  "failed to get version %s: %m", path);
236         }
237
238         if (strcmp(version->name, "amdgpu")) {
239                 drmFreeVersion(version);
240                 close(fd);
241                 return VK_ERROR_INCOMPATIBLE_DRIVER;
242         }
243         drmFreeVersion(version);
244
245         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
246         device->instance = instance;
247         assert(strlen(path) < ARRAY_SIZE(device->path));
248         strncpy(device->path, path, ARRAY_SIZE(device->path));
249
250         device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
251                                                instance->perftest_flags);
252         if (!device->ws) {
253                 result = VK_ERROR_INCOMPATIBLE_DRIVER;
254                 goto fail;
255         }
256
257         device->local_fd = fd;
258         device->ws->query_info(device->ws, &device->rad_info);
259
260         radv_handle_env_var_force_family(device);
261
262         radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
263
264         if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
265                 device->ws->destroy(device->ws);
266                 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
267                                    "cannot generate UUID");
268                 goto fail;
269         }
270
271         /* These flags affect shader compilation. */
272         uint64_t shader_env_flags =
273                 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
274                 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
275
276         /* The gpu id is already embeded in the uuid so we just pass "radv"
277          * when creating the cache.
278          */
279         char buf[VK_UUID_SIZE * 2 + 1];
280         disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
281         device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
282
283         fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
284
285         radv_get_driver_uuid(&device->device_uuid);
286         radv_get_device_uuid(&device->rad_info, &device->device_uuid);
287
288         if (device->rad_info.family == CHIP_STONEY ||
289             device->rad_info.chip_class >= GFX9) {
290                 device->has_rbplus = true;
291                 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
292         }
293
294         /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
295          * on SI.
296          */
297         device->has_clear_state = device->rad_info.chip_class >= CIK;
298
299         device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
300
301         /* Vega10/Raven need a special workaround for a hardware bug. */
302         device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
303                                   device->rad_info.family == CHIP_RAVEN;
304
305         radv_physical_device_init_mem_types(device);
306         radv_fill_device_extension_table(device, &device->supported_extensions);
307
308         result = radv_init_wsi(device);
309         if (result != VK_SUCCESS) {
310                 device->ws->destroy(device->ws);
311                 goto fail;
312         }
313
314         return VK_SUCCESS;
315
316 fail:
317         close(fd);
318         return result;
319 }
320
321 static void
322 radv_physical_device_finish(struct radv_physical_device *device)
323 {
324         radv_finish_wsi(device);
325         device->ws->destroy(device->ws);
326         disk_cache_destroy(device->disk_cache);
327         close(device->local_fd);
328 }
329
330 static void *
331 default_alloc_func(void *pUserData, size_t size, size_t align,
332                    VkSystemAllocationScope allocationScope)
333 {
334         return malloc(size);
335 }
336
337 static void *
338 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
339                      size_t align, VkSystemAllocationScope allocationScope)
340 {
341         return realloc(pOriginal, size);
342 }
343
344 static void
345 default_free_func(void *pUserData, void *pMemory)
346 {
347         free(pMemory);
348 }
349
350 static const VkAllocationCallbacks default_alloc = {
351         .pUserData = NULL,
352         .pfnAllocation = default_alloc_func,
353         .pfnReallocation = default_realloc_func,
354         .pfnFree = default_free_func,
355 };
356
357 static const struct debug_control radv_debug_options[] = {
358         {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
359         {"nodcc", RADV_DEBUG_NO_DCC},
360         {"shaders", RADV_DEBUG_DUMP_SHADERS},
361         {"nocache", RADV_DEBUG_NO_CACHE},
362         {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
363         {"nohiz", RADV_DEBUG_NO_HIZ},
364         {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
365         {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
366         {"allbos", RADV_DEBUG_ALL_BOS},
367         {"noibs", RADV_DEBUG_NO_IBS},
368         {"spirv", RADV_DEBUG_DUMP_SPIRV},
369         {"vmfaults", RADV_DEBUG_VM_FAULTS},
370         {"zerovram", RADV_DEBUG_ZERO_VRAM},
371         {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
372         {"nosisched", RADV_DEBUG_NO_SISCHED},
373         {"preoptir", RADV_DEBUG_PREOPTIR},
374         {NULL, 0}
375 };
376
377 const char *
378 radv_get_debug_option_name(int id)
379 {
380         assert(id < ARRAY_SIZE(radv_debug_options) - 1);
381         return radv_debug_options[id].string;
382 }
383
384 static const struct debug_control radv_perftest_options[] = {
385         {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
386         {"sisched", RADV_PERFTEST_SISCHED},
387         {"localbos", RADV_PERFTEST_LOCAL_BOS},
388         {"binning", RADV_PERFTEST_BINNING},
389         {NULL, 0}
390 };
391
392 const char *
393 radv_get_perftest_option_name(int id)
394 {
395         assert(id < ARRAY_SIZE(radv_debug_options) - 1);
396         return radv_perftest_options[id].string;
397 }
398
399 static void
400 radv_handle_per_app_options(struct radv_instance *instance,
401                             const VkApplicationInfo *info)
402 {
403         const char *name = info ? info->pApplicationName : NULL;
404
405         if (!name)
406                 return;
407
408         if (!strcmp(name, "Talos - Linux - 32bit") ||
409             !strcmp(name, "Talos - Linux - 64bit")) {
410                 /* Force enable LLVM sisched for Talos because it looks safe
411                  * and it gives few more FPS.
412                  */
413                 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
414         }
415 }
416
417 static int radv_get_instance_extension_index(const char *name)
418 {
419         for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
420                 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
421                         return i;
422         }
423         return -1;
424 }
425
426
427 VkResult radv_CreateInstance(
428         const VkInstanceCreateInfo*                 pCreateInfo,
429         const VkAllocationCallbacks*                pAllocator,
430         VkInstance*                                 pInstance)
431 {
432         struct radv_instance *instance;
433         VkResult result;
434
435         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
436
437         uint32_t client_version;
438         if (pCreateInfo->pApplicationInfo &&
439             pCreateInfo->pApplicationInfo->apiVersion != 0) {
440                 client_version = pCreateInfo->pApplicationInfo->apiVersion;
441         } else {
442                 client_version = VK_MAKE_VERSION(1, 0, 0);
443         }
444
445         if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
446             client_version > VK_MAKE_VERSION(1, 1, 0xfff)) {
447                 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
448                                  "Client requested version %d.%d.%d",
449                                  VK_VERSION_MAJOR(client_version),
450                                  VK_VERSION_MINOR(client_version),
451                                  VK_VERSION_PATCH(client_version));
452         }
453
454         instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
455                               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
456         if (!instance)
457                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
458
459         instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
460
461         if (pAllocator)
462                 instance->alloc = *pAllocator;
463         else
464                 instance->alloc = default_alloc;
465
466         instance->apiVersion = client_version;
467         instance->physicalDeviceCount = -1;
468
469         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
470                 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
471                 int index = radv_get_instance_extension_index(ext_name);
472
473                 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
474                         vk_free2(&default_alloc, pAllocator, instance);
475                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
476                 }
477
478                 instance->enabled_extensions.extensions[index] = true;
479         }
480
481         result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
482         if (result != VK_SUCCESS) {
483                 vk_free2(&default_alloc, pAllocator, instance);
484                 return vk_error(result);
485         }
486
487         _mesa_locale_init();
488
489         VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
490
491         instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
492                                                    radv_debug_options);
493
494         instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
495                                                    radv_perftest_options);
496
497         radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
498
499         if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
500                 /* Disable sisched when the user requests it, this is mostly
501                  * useful when the driver force-enable sisched for the given
502                  * application.
503                  */
504                 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
505         }
506
507         *pInstance = radv_instance_to_handle(instance);
508
509         return VK_SUCCESS;
510 }
511
512 void radv_DestroyInstance(
513         VkInstance                                  _instance,
514         const VkAllocationCallbacks*                pAllocator)
515 {
516         RADV_FROM_HANDLE(radv_instance, instance, _instance);
517
518         if (!instance)
519                 return;
520
521         for (int i = 0; i < instance->physicalDeviceCount; ++i) {
522                 radv_physical_device_finish(instance->physicalDevices + i);
523         }
524
525         VG(VALGRIND_DESTROY_MEMPOOL(instance));
526
527         _mesa_locale_fini();
528
529         vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
530
531         vk_free(&instance->alloc, instance);
532 }
533
534 static VkResult
535 radv_enumerate_devices(struct radv_instance *instance)
536 {
537         /* TODO: Check for more devices ? */
538         drmDevicePtr devices[8];
539         VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
540         int max_devices;
541
542         instance->physicalDeviceCount = 0;
543
544         max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
545         if (max_devices < 1)
546                 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
547
548         for (unsigned i = 0; i < (unsigned)max_devices; i++) {
549                 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
550                     devices[i]->bustype == DRM_BUS_PCI &&
551                     devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
552
553                         result = radv_physical_device_init(instance->physicalDevices +
554                                                            instance->physicalDeviceCount,
555                                                            instance,
556                                                            devices[i]);
557                         if (result == VK_SUCCESS)
558                                 ++instance->physicalDeviceCount;
559                         else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
560                                 break;
561                 }
562         }
563         drmFreeDevices(devices, max_devices);
564
565         return result;
566 }
567
568 VkResult radv_EnumeratePhysicalDevices(
569         VkInstance                                  _instance,
570         uint32_t*                                   pPhysicalDeviceCount,
571         VkPhysicalDevice*                           pPhysicalDevices)
572 {
573         RADV_FROM_HANDLE(radv_instance, instance, _instance);
574         VkResult result;
575
576         if (instance->physicalDeviceCount < 0) {
577                 result = radv_enumerate_devices(instance);
578                 if (result != VK_SUCCESS &&
579                     result != VK_ERROR_INCOMPATIBLE_DRIVER)
580                         return result;
581         }
582
583         if (!pPhysicalDevices) {
584                 *pPhysicalDeviceCount = instance->physicalDeviceCount;
585         } else {
586                 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
587                 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
588                         pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
589         }
590
591         return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
592                                                                      : VK_SUCCESS;
593 }
594
595 VkResult radv_EnumeratePhysicalDeviceGroups(
596     VkInstance                                  _instance,
597     uint32_t*                                   pPhysicalDeviceGroupCount,
598     VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
599 {
600         RADV_FROM_HANDLE(radv_instance, instance, _instance);
601         VkResult result;
602
603         if (instance->physicalDeviceCount < 0) {
604                 result = radv_enumerate_devices(instance);
605                 if (result != VK_SUCCESS &&
606                     result != VK_ERROR_INCOMPATIBLE_DRIVER)
607                         return result;
608         }
609
610         if (!pPhysicalDeviceGroupProperties) {
611                 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
612         } else {
613                 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
614                 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
615                         pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
616                         pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
617                         pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
618                 }
619         }
620         return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
621                                                                           : VK_SUCCESS;
622 }
623
624 void radv_GetPhysicalDeviceFeatures(
625         VkPhysicalDevice                            physicalDevice,
626         VkPhysicalDeviceFeatures*                   pFeatures)
627 {
628         memset(pFeatures, 0, sizeof(*pFeatures));
629
630         *pFeatures = (VkPhysicalDeviceFeatures) {
631                 .robustBufferAccess                       = true,
632                 .fullDrawIndexUint32                      = true,
633                 .imageCubeArray                           = true,
634                 .independentBlend                         = true,
635                 .geometryShader                           = true,
636                 .tessellationShader                       = true,
637                 .sampleRateShading                        = true,
638                 .dualSrcBlend                             = true,
639                 .logicOp                                  = true,
640                 .multiDrawIndirect                        = true,
641                 .drawIndirectFirstInstance                = true,
642                 .depthClamp                               = true,
643                 .depthBiasClamp                           = true,
644                 .fillModeNonSolid                         = true,
645                 .depthBounds                              = true,
646                 .wideLines                                = true,
647                 .largePoints                              = true,
648                 .alphaToOne                               = true,
649                 .multiViewport                            = true,
650                 .samplerAnisotropy                        = true,
651                 .textureCompressionETC2                   = false,
652                 .textureCompressionASTC_LDR               = false,
653                 .textureCompressionBC                     = true,
654                 .occlusionQueryPrecise                    = true,
655                 .pipelineStatisticsQuery                  = true,
656                 .vertexPipelineStoresAndAtomics           = true,
657                 .fragmentStoresAndAtomics                 = true,
658                 .shaderTessellationAndGeometryPointSize   = true,
659                 .shaderImageGatherExtended                = true,
660                 .shaderStorageImageExtendedFormats        = true,
661                 .shaderStorageImageMultisample            = false,
662                 .shaderUniformBufferArrayDynamicIndexing  = true,
663                 .shaderSampledImageArrayDynamicIndexing   = true,
664                 .shaderStorageBufferArrayDynamicIndexing  = true,
665                 .shaderStorageImageArrayDynamicIndexing   = true,
666                 .shaderStorageImageReadWithoutFormat      = true,
667                 .shaderStorageImageWriteWithoutFormat     = true,
668                 .shaderClipDistance                       = true,
669                 .shaderCullDistance                       = true,
670                 .shaderFloat64                            = true,
671                 .shaderInt64                              = true,
672                 .shaderInt16                              = false,
673                 .sparseBinding                            = true,
674                 .variableMultisampleRate                  = true,
675                 .inheritedQueries                         = true,
676         };
677 }
678
679 void radv_GetPhysicalDeviceFeatures2(
680         VkPhysicalDevice                            physicalDevice,
681         VkPhysicalDeviceFeatures2KHR               *pFeatures)
682 {
683         vk_foreach_struct(ext, pFeatures->pNext) {
684                 switch (ext->sType) {
685                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
686                         VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
687                         features->variablePointersStorageBuffer = true;
688                         features->variablePointers = false;
689                         break;
690                 }
691                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
692                         VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
693                         features->multiview = true;
694                         features->multiviewGeometryShader = true;
695                         features->multiviewTessellationShader = true;
696                         break;
697                 }
698                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
699                         VkPhysicalDeviceShaderDrawParameterFeatures *features =
700                             (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
701                         features->shaderDrawParameters = true;
702                         break;
703                 }
704                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
705                         VkPhysicalDeviceProtectedMemoryFeatures *features =
706                             (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
707                         features->protectedMemory = false;
708                         break;
709                 }
710                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
711                         VkPhysicalDevice16BitStorageFeatures *features =
712                             (VkPhysicalDevice16BitStorageFeatures*)ext;
713                         features->storageBuffer16BitAccess = false;
714                         features->uniformAndStorageBuffer16BitAccess = false;
715                         features->storagePushConstant16 = false;
716                         features->storageInputOutput16 = false;
717                         break;
718                 }
719                 default:
720                         break;
721                 }
722         }
723         return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
724 }
725
726 void radv_GetPhysicalDeviceProperties(
727         VkPhysicalDevice                            physicalDevice,
728         VkPhysicalDeviceProperties*                 pProperties)
729 {
730         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
731         VkSampleCountFlags sample_counts = 0xf;
732
733         /* make sure that the entire descriptor set is addressable with a signed
734          * 32-bit int. So the sum of all limits scaled by descriptor size has to
735          * be at most 2 GiB. the combined image & samples object count as one of
736          * both. This limit is for the pipeline layout, not for the set layout, but
737          * there is no set limit, so we just set a pipeline limit. I don't think
738          * any app is going to hit this soon. */
739         size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
740                   (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
741                    32 /* storage buffer, 32 due to potential space wasted on alignment */ +
742                    32 /* sampler, largest when combined with image */ +
743                    64 /* sampled image */ +
744                    64 /* storage image */);
745
746         VkPhysicalDeviceLimits limits = {
747                 .maxImageDimension1D                      = (1 << 14),
748                 .maxImageDimension2D                      = (1 << 14),
749                 .maxImageDimension3D                      = (1 << 11),
750                 .maxImageDimensionCube                    = (1 << 14),
751                 .maxImageArrayLayers                      = (1 << 11),
752                 .maxTexelBufferElements                   = 128 * 1024 * 1024,
753                 .maxUniformBufferRange                    = UINT32_MAX,
754                 .maxStorageBufferRange                    = UINT32_MAX,
755                 .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
756                 .maxMemoryAllocationCount                 = UINT32_MAX,
757                 .maxSamplerAllocationCount                = 64 * 1024,
758                 .bufferImageGranularity                   = 64, /* A cache line */
759                 .sparseAddressSpaceSize                   = 0xffffffffu, /* buffer max size */
760                 .maxBoundDescriptorSets                   = MAX_SETS,
761                 .maxPerStageDescriptorSamplers            = max_descriptor_set_size,
762                 .maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
763                 .maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
764                 .maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
765                 .maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
766                 .maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
767                 .maxPerStageResources                     = max_descriptor_set_size,
768                 .maxDescriptorSetSamplers                 = max_descriptor_set_size,
769                 .maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
770                 .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
771                 .maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
772                 .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
773                 .maxDescriptorSetSampledImages            = max_descriptor_set_size,
774                 .maxDescriptorSetStorageImages            = max_descriptor_set_size,
775                 .maxDescriptorSetInputAttachments         = max_descriptor_set_size,
776                 .maxVertexInputAttributes                 = 32,
777                 .maxVertexInputBindings                   = 32,
778                 .maxVertexInputAttributeOffset            = 2047,
779                 .maxVertexInputBindingStride              = 2048,
780                 .maxVertexOutputComponents                = 128,
781                 .maxTessellationGenerationLevel           = 64,
782                 .maxTessellationPatchSize                 = 32,
783                 .maxTessellationControlPerVertexInputComponents = 128,
784                 .maxTessellationControlPerVertexOutputComponents = 128,
785                 .maxTessellationControlPerPatchOutputComponents = 120,
786                 .maxTessellationControlTotalOutputComponents = 4096,
787                 .maxTessellationEvaluationInputComponents = 128,
788                 .maxTessellationEvaluationOutputComponents = 128,
789                 .maxGeometryShaderInvocations             = 127,
790                 .maxGeometryInputComponents               = 64,
791                 .maxGeometryOutputComponents              = 128,
792                 .maxGeometryOutputVertices                = 256,
793                 .maxGeometryTotalOutputComponents         = 1024,
794                 .maxFragmentInputComponents               = 128,
795                 .maxFragmentOutputAttachments             = 8,
796                 .maxFragmentDualSrcAttachments            = 1,
797                 .maxFragmentCombinedOutputResources       = 8,
798                 .maxComputeSharedMemorySize               = 32768,
799                 .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
800                 .maxComputeWorkGroupInvocations           = 2048,
801                 .maxComputeWorkGroupSize = {
802                         2048,
803                         2048,
804                         2048
805                 },
806                 .subPixelPrecisionBits                    = 4 /* FIXME */,
807                 .subTexelPrecisionBits                    = 4 /* FIXME */,
808                 .mipmapPrecisionBits                      = 4 /* FIXME */,
809                 .maxDrawIndexedIndexValue                 = UINT32_MAX,
810                 .maxDrawIndirectCount                     = UINT32_MAX,
811                 .maxSamplerLodBias                        = 16,
812                 .maxSamplerAnisotropy                     = 16,
813                 .maxViewports                             = MAX_VIEWPORTS,
814                 .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
815                 .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
816                 .viewportSubPixelBits                     = 13, /* We take a float? */
817                 .minMemoryMapAlignment                    = 4096, /* A page */
818                 .minTexelBufferOffsetAlignment            = 1,
819                 .minUniformBufferOffsetAlignment          = 4,
820                 .minStorageBufferOffsetAlignment          = 4,
821                 .minTexelOffset                           = -32,
822                 .maxTexelOffset                           = 31,
823                 .minTexelGatherOffset                     = -32,
824                 .maxTexelGatherOffset                     = 31,
825                 .minInterpolationOffset                   = -2,
826                 .maxInterpolationOffset                   = 2,
827                 .subPixelInterpolationOffsetBits          = 8,
828                 .maxFramebufferWidth                      = (1 << 14),
829                 .maxFramebufferHeight                     = (1 << 14),
830                 .maxFramebufferLayers                     = (1 << 10),
831                 .framebufferColorSampleCounts             = sample_counts,
832                 .framebufferDepthSampleCounts             = sample_counts,
833                 .framebufferStencilSampleCounts           = sample_counts,
834                 .framebufferNoAttachmentsSampleCounts     = sample_counts,
835                 .maxColorAttachments                      = MAX_RTS,
836                 .sampledImageColorSampleCounts            = sample_counts,
837                 .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
838                 .sampledImageDepthSampleCounts            = sample_counts,
839                 .sampledImageStencilSampleCounts          = sample_counts,
840                 .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
841                 .maxSampleMaskWords                       = 1,
842                 .timestampComputeAndGraphics              = true,
843                 .timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
844                 .maxClipDistances                         = 8,
845                 .maxCullDistances                         = 8,
846                 .maxCombinedClipAndCullDistances          = 8,
847                 .discreteQueuePriorities                  = 1,
848                 .pointSizeRange                           = { 0.125, 255.875 },
849                 .lineWidthRange                           = { 0.0, 7.9921875 },
850                 .pointSizeGranularity                     = (1.0 / 8.0),
851                 .lineWidthGranularity                     = (1.0 / 128.0),
852                 .strictLines                              = false, /* FINISHME */
853                 .standardSampleLocations                  = true,
854                 .optimalBufferCopyOffsetAlignment         = 128,
855                 .optimalBufferCopyRowPitchAlignment       = 128,
856                 .nonCoherentAtomSize                      = 64,
857         };
858
859         *pProperties = (VkPhysicalDeviceProperties) {
860                 .apiVersion = radv_physical_device_api_version(pdevice),
861                 .driverVersion = vk_get_driver_version(),
862                 .vendorID = ATI_VENDOR_ID,
863                 .deviceID = pdevice->rad_info.pci_id,
864                 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
865                 .limits = limits,
866                 .sparseProperties = {0},
867         };
868
869         strcpy(pProperties->deviceName, pdevice->name);
870         memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
871 }
872
873 void radv_GetPhysicalDeviceProperties2(
874         VkPhysicalDevice                            physicalDevice,
875         VkPhysicalDeviceProperties2KHR             *pProperties)
876 {
877         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
878         radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
879
880         vk_foreach_struct(ext, pProperties->pNext) {
881                 switch (ext->sType) {
882                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
883                         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
884                                 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
885                         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
886                         break;
887                 }
888                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
889                         VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
890                         memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
891                         memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
892                         properties->deviceLUIDValid = false;
893                         break;
894                 }
895                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
896                         VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
897                         properties->maxMultiviewViewCount = MAX_VIEWS;
898                         properties->maxMultiviewInstanceIndex = INT_MAX;
899                         break;
900                 }
901                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
902                         VkPhysicalDevicePointClippingPropertiesKHR *properties =
903                             (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
904                         properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
905                         break;
906                 }
907                 case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
908                         VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
909                             (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
910                         properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
911                         break;
912                 }
913                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
914                         VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
915                             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
916                         properties->minImportedHostPointerAlignment = 4096;
917                         break;
918                 }
919                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
920                         VkPhysicalDeviceSubgroupProperties *properties =
921                             (VkPhysicalDeviceSubgroupProperties*)ext;
922                         properties->subgroupSize = 64;
923                         properties->supportedStages = VK_SHADER_STAGE_ALL;
924                         properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
925                         properties->quadOperationsInAllStages = false;
926                         break;
927                 }
928                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
929                         VkPhysicalDeviceMaintenance3Properties *properties =
930                             (VkPhysicalDeviceMaintenance3Properties*)ext;
931                         /* Make sure everything is addressable by a signed 32-bit int, and
932                          * our largest descriptors are 96 bytes. */
933                         properties->maxPerSetDescriptors = (1ull << 31) / 96;
934                         /* Our buffer size fields allow only this much */
935                         properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
936                         break;
937                 }
938                 default:
939                         break;
940                 }
941         }
942 }
943
944 static void radv_get_physical_device_queue_family_properties(
945         struct radv_physical_device*                pdevice,
946         uint32_t*                                   pCount,
947         VkQueueFamilyProperties**                    pQueueFamilyProperties)
948 {
949         int num_queue_families = 1;
950         int idx;
951         if (pdevice->rad_info.num_compute_rings > 0 &&
952             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
953                 num_queue_families++;
954
955         if (pQueueFamilyProperties == NULL) {
956                 *pCount = num_queue_families;
957                 return;
958         }
959
960         if (!*pCount)
961                 return;
962
963         idx = 0;
964         if (*pCount >= 1) {
965                 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
966                         .queueFlags = VK_QUEUE_GRAPHICS_BIT |
967                                       VK_QUEUE_COMPUTE_BIT |
968                                       VK_QUEUE_TRANSFER_BIT |
969                                       VK_QUEUE_SPARSE_BINDING_BIT,
970                         .queueCount = 1,
971                         .timestampValidBits = 64,
972                         .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
973                 };
974                 idx++;
975         }
976
977         if (pdevice->rad_info.num_compute_rings > 0 &&
978             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
979                 if (*pCount > idx) {
980                         *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
981                                 .queueFlags = VK_QUEUE_COMPUTE_BIT |
982                                               VK_QUEUE_TRANSFER_BIT |
983                                               VK_QUEUE_SPARSE_BINDING_BIT,
984                                 .queueCount = pdevice->rad_info.num_compute_rings,
985                                 .timestampValidBits = 64,
986                                 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
987                         };
988                         idx++;
989                 }
990         }
991         *pCount = idx;
992 }
993
994 void radv_GetPhysicalDeviceQueueFamilyProperties(
995         VkPhysicalDevice                            physicalDevice,
996         uint32_t*                                   pCount,
997         VkQueueFamilyProperties*                    pQueueFamilyProperties)
998 {
999         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1000         if (!pQueueFamilyProperties) {
1001                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1002                 return;
1003         }
1004         VkQueueFamilyProperties *properties[] = {
1005                 pQueueFamilyProperties + 0,
1006                 pQueueFamilyProperties + 1,
1007                 pQueueFamilyProperties + 2,
1008         };
1009         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1010         assert(*pCount <= 3);
1011 }
1012
1013 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1014         VkPhysicalDevice                            physicalDevice,
1015         uint32_t*                                   pCount,
1016         VkQueueFamilyProperties2KHR                *pQueueFamilyProperties)
1017 {
1018         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1019         if (!pQueueFamilyProperties) {
1020                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1021                 return;
1022         }
1023         VkQueueFamilyProperties *properties[] = {
1024                 &pQueueFamilyProperties[0].queueFamilyProperties,
1025                 &pQueueFamilyProperties[1].queueFamilyProperties,
1026                 &pQueueFamilyProperties[2].queueFamilyProperties,
1027         };
1028         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1029         assert(*pCount <= 3);
1030 }
1031
1032 void radv_GetPhysicalDeviceMemoryProperties(
1033         VkPhysicalDevice                            physicalDevice,
1034         VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
1035 {
1036         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1037
1038         *pMemoryProperties = physical_device->memory_properties;
1039 }
1040
1041 void radv_GetPhysicalDeviceMemoryProperties2(
1042         VkPhysicalDevice                            physicalDevice,
1043         VkPhysicalDeviceMemoryProperties2KHR       *pMemoryProperties)
1044 {
1045         return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1046                                                       &pMemoryProperties->memoryProperties);
1047 }
1048
1049 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1050         VkDevice                                    _device,
1051         VkExternalMemoryHandleTypeFlagBitsKHR       handleType,
1052         const void                                 *pHostPointer,
1053         VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
1054 {
1055         RADV_FROM_HANDLE(radv_device, device, _device);
1056
1057         switch (handleType)
1058         {
1059         case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1060                 const struct radv_physical_device *physical_device = device->physical_device;
1061                 uint32_t memoryTypeBits = 0;
1062                 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1063                         if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1064                                 memoryTypeBits = (1 << i);
1065                                 break;
1066                         }
1067                 }
1068                 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1069                 return VK_SUCCESS;
1070         }
1071         default:
1072                 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1073         }
1074 }
1075
1076 static enum radeon_ctx_priority
1077 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1078 {
1079         /* Default to MEDIUM when a specific global priority isn't requested */
1080         if (!pObj)
1081                 return RADEON_CTX_PRIORITY_MEDIUM;
1082
1083         switch(pObj->globalPriority) {
1084         case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1085                 return RADEON_CTX_PRIORITY_REALTIME;
1086         case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1087                 return RADEON_CTX_PRIORITY_HIGH;
1088         case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1089                 return RADEON_CTX_PRIORITY_MEDIUM;
1090         case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1091                 return RADEON_CTX_PRIORITY_LOW;
1092         default:
1093                 unreachable("Illegal global priority value");
1094                 return RADEON_CTX_PRIORITY_INVALID;
1095         }
1096 }
1097
1098 static int
1099 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1100                 uint32_t queue_family_index, int idx,
1101                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1102 {
1103         queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1104         queue->device = device;
1105         queue->queue_family_index = queue_family_index;
1106         queue->queue_idx = idx;
1107         queue->priority = radv_get_queue_global_priority(global_priority);
1108
1109         queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1110         if (!queue->hw_ctx)
1111                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1112
1113         return VK_SUCCESS;
1114 }
1115
1116 static void
1117 radv_queue_finish(struct radv_queue *queue)
1118 {
1119         if (queue->hw_ctx)
1120                 queue->device->ws->ctx_destroy(queue->hw_ctx);
1121
1122         if (queue->initial_full_flush_preamble_cs)
1123                 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1124         if (queue->initial_preamble_cs)
1125                 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1126         if (queue->continue_preamble_cs)
1127                 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1128         if (queue->descriptor_bo)
1129                 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1130         if (queue->scratch_bo)
1131                 queue->device->ws->buffer_destroy(queue->scratch_bo);
1132         if (queue->esgs_ring_bo)
1133                 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1134         if (queue->gsvs_ring_bo)
1135                 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1136         if (queue->tess_rings_bo)
1137                 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1138         if (queue->compute_scratch_bo)
1139                 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1140 }
1141
1142 static void
1143 radv_device_init_gs_info(struct radv_device *device)
1144 {
1145         switch (device->physical_device->rad_info.family) {
1146         case CHIP_OLAND:
1147         case CHIP_HAINAN:
1148         case CHIP_KAVERI:
1149         case CHIP_KABINI:
1150         case CHIP_MULLINS:
1151         case CHIP_ICELAND:
1152         case CHIP_CARRIZO:
1153         case CHIP_STONEY:
1154                 device->gs_table_depth = 16;
1155                 return;
1156         case CHIP_TAHITI:
1157         case CHIP_PITCAIRN:
1158         case CHIP_VERDE:
1159         case CHIP_BONAIRE:
1160         case CHIP_HAWAII:
1161         case CHIP_TONGA:
1162         case CHIP_FIJI:
1163         case CHIP_POLARIS10:
1164         case CHIP_POLARIS11:
1165         case CHIP_POLARIS12:
1166         case CHIP_VEGA10:
1167         case CHIP_RAVEN:
1168                 device->gs_table_depth = 32;
1169                 return;
1170         default:
1171                 unreachable("unknown GPU");
1172         }
1173 }
1174
1175 static int radv_get_device_extension_index(const char *name)
1176 {
1177         for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1178                 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1179                         return i;
1180         }
1181         return -1;
1182 }
1183
1184 VkResult radv_CreateDevice(
1185         VkPhysicalDevice                            physicalDevice,
1186         const VkDeviceCreateInfo*                   pCreateInfo,
1187         const VkAllocationCallbacks*                pAllocator,
1188         VkDevice*                                   pDevice)
1189 {
1190         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1191         VkResult result;
1192         struct radv_device *device;
1193
1194         bool keep_shader_info = false;
1195
1196         /* Check enabled features */
1197         if (pCreateInfo->pEnabledFeatures) {
1198                 VkPhysicalDeviceFeatures supported_features;
1199                 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1200                 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1201                 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1202                 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1203                 for (uint32_t i = 0; i < num_features; i++) {
1204                         if (enabled_feature[i] && !supported_feature[i])
1205                                 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1206                 }
1207         }
1208
1209         device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1210                             sizeof(*device), 8,
1211                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1212         if (!device)
1213                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1214
1215         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1216         device->instance = physical_device->instance;
1217         device->physical_device = physical_device;
1218
1219         device->ws = physical_device->ws;
1220         if (pAllocator)
1221                 device->alloc = *pAllocator;
1222         else
1223                 device->alloc = physical_device->instance->alloc;
1224
1225         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1226                 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1227                 int index = radv_get_device_extension_index(ext_name);
1228                 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1229                         vk_free(&device->alloc, device);
1230                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1231                 }
1232
1233                 device->enabled_extensions.extensions[index] = true;
1234         }
1235
1236         keep_shader_info = device->enabled_extensions.AMD_shader_info;
1237
1238         mtx_init(&device->shader_slab_mutex, mtx_plain);
1239         list_inithead(&device->shader_slabs);
1240
1241         for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1242                 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1243                 uint32_t qfi = queue_create->queueFamilyIndex;
1244                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1245                         vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1246
1247                 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1248
1249                 device->queues[qfi] = vk_alloc(&device->alloc,
1250                                                queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1251                 if (!device->queues[qfi]) {
1252                         result = VK_ERROR_OUT_OF_HOST_MEMORY;
1253                         goto fail;
1254                 }
1255
1256                 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1257
1258                 device->queue_count[qfi] = queue_create->queueCount;
1259
1260                 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1261                         result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1262                         if (result != VK_SUCCESS)
1263                                 goto fail;
1264                 }
1265         }
1266
1267         device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1268                               (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
1269
1270         /* Disabled and not implemented for now. */
1271         device->dfsm_allowed = device->pbb_allowed && false;
1272
1273 #ifdef ANDROID
1274         device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1275 #endif
1276
1277         device->llvm_supports_spill = true;
1278
1279         /* The maximum number of scratch waves. Scratch space isn't divided
1280          * evenly between CUs. The number is only a function of the number of CUs.
1281          * We can decrease the constant to decrease the scratch buffer size.
1282          *
1283          * sctx->scratch_waves must be >= the maximum posible size of
1284          * 1 threadgroup, so that the hw doesn't hang from being unable
1285          * to start any.
1286          *
1287          * The recommended value is 4 per CU at most. Higher numbers don't
1288          * bring much benefit, but they still occupy chip resources (think
1289          * async compute). I've seen ~2% performance difference between 4 and 32.
1290          */
1291         uint32_t max_threads_per_block = 2048;
1292         device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1293                                      max_threads_per_block / 64);
1294
1295         device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1296
1297         if (device->physical_device->rad_info.chip_class >= CIK) {
1298                 /* If the KMD allows it (there is a KMD hw register for it),
1299                  * allow launching waves out-of-order.
1300                  */
1301                 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1302         }
1303
1304         radv_device_init_gs_info(device);
1305
1306         device->tess_offchip_block_dw_size =
1307                 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1308         device->has_distributed_tess =
1309                 device->physical_device->rad_info.chip_class >= VI &&
1310                 device->physical_device->rad_info.max_se >= 2;
1311
1312         if (getenv("RADV_TRACE_FILE")) {
1313                 keep_shader_info = true;
1314
1315                 if (!radv_init_trace(device))
1316                         goto fail;
1317         }
1318
1319         device->keep_shader_info = keep_shader_info;
1320
1321         result = radv_device_init_meta(device);
1322         if (result != VK_SUCCESS)
1323                 goto fail;
1324
1325         radv_device_init_msaa(device);
1326
1327         for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1328                 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1329                 switch (family) {
1330                 case RADV_QUEUE_GENERAL:
1331                         radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1332                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1333                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1334                         break;
1335                 case RADV_QUEUE_COMPUTE:
1336                         radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1337                         radeon_emit(device->empty_cs[family], 0);
1338                         break;
1339                 }
1340                 device->ws->cs_finalize(device->empty_cs[family]);
1341         }
1342
1343         if (device->physical_device->rad_info.chip_class >= CIK)
1344                 cik_create_gfx_config(device);
1345
1346         VkPipelineCacheCreateInfo ci;
1347         ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1348         ci.pNext = NULL;
1349         ci.flags = 0;
1350         ci.pInitialData = NULL;
1351         ci.initialDataSize = 0;
1352         VkPipelineCache pc;
1353         result = radv_CreatePipelineCache(radv_device_to_handle(device),
1354                                           &ci, NULL, &pc);
1355         if (result != VK_SUCCESS)
1356                 goto fail_meta;
1357
1358         device->mem_cache = radv_pipeline_cache_from_handle(pc);
1359
1360         *pDevice = radv_device_to_handle(device);
1361         return VK_SUCCESS;
1362
1363 fail_meta:
1364         radv_device_finish_meta(device);
1365 fail:
1366         if (device->trace_bo)
1367                 device->ws->buffer_destroy(device->trace_bo);
1368
1369         if (device->gfx_init)
1370                 device->ws->buffer_destroy(device->gfx_init);
1371
1372         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1373                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1374                         radv_queue_finish(&device->queues[i][q]);
1375                 if (device->queue_count[i])
1376                         vk_free(&device->alloc, device->queues[i]);
1377         }
1378
1379         vk_free(&device->alloc, device);
1380         return result;
1381 }
1382
1383 void radv_DestroyDevice(
1384         VkDevice                                    _device,
1385         const VkAllocationCallbacks*                pAllocator)
1386 {
1387         RADV_FROM_HANDLE(radv_device, device, _device);
1388
1389         if (!device)
1390                 return;
1391
1392         if (device->trace_bo)
1393                 device->ws->buffer_destroy(device->trace_bo);
1394
1395         if (device->gfx_init)
1396                 device->ws->buffer_destroy(device->gfx_init);
1397
1398         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1399                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1400                         radv_queue_finish(&device->queues[i][q]);
1401                 if (device->queue_count[i])
1402                         vk_free(&device->alloc, device->queues[i]);
1403                 if (device->empty_cs[i])
1404                         device->ws->cs_destroy(device->empty_cs[i]);
1405         }
1406         radv_device_finish_meta(device);
1407
1408         VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1409         radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1410
1411         radv_destroy_shader_slabs(device);
1412
1413         vk_free(&device->alloc, device);
1414 }
1415
1416 VkResult radv_EnumerateInstanceLayerProperties(
1417         uint32_t*                                   pPropertyCount,
1418         VkLayerProperties*                          pProperties)
1419 {
1420         if (pProperties == NULL) {
1421                 *pPropertyCount = 0;
1422                 return VK_SUCCESS;
1423         }
1424
1425         /* None supported at this time */
1426         return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1427 }
1428
1429 VkResult radv_EnumerateDeviceLayerProperties(
1430         VkPhysicalDevice                            physicalDevice,
1431         uint32_t*                                   pPropertyCount,
1432         VkLayerProperties*                          pProperties)
1433 {
1434         if (pProperties == NULL) {
1435                 *pPropertyCount = 0;
1436                 return VK_SUCCESS;
1437         }
1438
1439         /* None supported at this time */
1440         return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1441 }
1442
1443 void radv_GetDeviceQueue2(
1444         VkDevice                                    _device,
1445         const VkDeviceQueueInfo2*                   pQueueInfo,
1446         VkQueue*                                    pQueue)
1447 {
1448         RADV_FROM_HANDLE(radv_device, device, _device);
1449
1450         *pQueue = radv_queue_to_handle(&device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex]);
1451 }
1452
1453 void radv_GetDeviceQueue(
1454         VkDevice                                    _device,
1455         uint32_t                                    queueFamilyIndex,
1456         uint32_t                                    queueIndex,
1457         VkQueue*                                    pQueue)
1458 {
1459         const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1460                 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1461                 .queueFamilyIndex = queueFamilyIndex,
1462                 .queueIndex = queueIndex
1463         };
1464
1465         radv_GetDeviceQueue2(_device, &info, pQueue);
1466 }
1467
1468 static void
1469 fill_geom_tess_rings(struct radv_queue *queue,
1470                      uint32_t *map,
1471                      bool add_sample_positions,
1472                      uint32_t esgs_ring_size,
1473                      struct radeon_winsys_bo *esgs_ring_bo,
1474                      uint32_t gsvs_ring_size,
1475                      struct radeon_winsys_bo *gsvs_ring_bo,
1476                      uint32_t tess_factor_ring_size,
1477                      uint32_t tess_offchip_ring_offset,
1478                      uint32_t tess_offchip_ring_size,
1479                      struct radeon_winsys_bo *tess_rings_bo)
1480 {
1481         uint64_t esgs_va = 0, gsvs_va = 0;
1482         uint64_t tess_va = 0, tess_offchip_va = 0;
1483         uint32_t *desc = &map[4];
1484
1485         if (esgs_ring_bo)
1486                 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1487         if (gsvs_ring_bo)
1488                 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1489         if (tess_rings_bo) {
1490                 tess_va = radv_buffer_get_va(tess_rings_bo);
1491                 tess_offchip_va = tess_va + tess_offchip_ring_offset;
1492         }
1493
1494         /* stride 0, num records - size, add tid, swizzle, elsize4,
1495            index stride 64 */
1496         desc[0] = esgs_va;
1497         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1498                 S_008F04_STRIDE(0) |
1499                 S_008F04_SWIZZLE_ENABLE(true);
1500         desc[2] = esgs_ring_size;
1501         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1502                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1503                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1504                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1505                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1506                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1507                 S_008F0C_ELEMENT_SIZE(1) |
1508                 S_008F0C_INDEX_STRIDE(3) |
1509                 S_008F0C_ADD_TID_ENABLE(true);
1510
1511         desc += 4;
1512         /* GS entry for ES->GS ring */
1513         /* stride 0, num records - size, elsize0,
1514            index stride 0 */
1515         desc[0] = esgs_va;
1516         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1517                 S_008F04_STRIDE(0) |
1518                 S_008F04_SWIZZLE_ENABLE(false);
1519         desc[2] = esgs_ring_size;
1520         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1521                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1522                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1523                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1524                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1525                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1526                 S_008F0C_ELEMENT_SIZE(0) |
1527                 S_008F0C_INDEX_STRIDE(0) |
1528                 S_008F0C_ADD_TID_ENABLE(false);
1529
1530         desc += 4;
1531         /* VS entry for GS->VS ring */
1532         /* stride 0, num records - size, elsize0,
1533            index stride 0 */
1534         desc[0] = gsvs_va;
1535         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1536                 S_008F04_STRIDE(0) |
1537                 S_008F04_SWIZZLE_ENABLE(false);
1538         desc[2] = gsvs_ring_size;
1539         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1540                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1541                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1542                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1543                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1544                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1545                 S_008F0C_ELEMENT_SIZE(0) |
1546                 S_008F0C_INDEX_STRIDE(0) |
1547                 S_008F0C_ADD_TID_ENABLE(false);
1548         desc += 4;
1549
1550         /* stride gsvs_itemsize, num records 64
1551            elsize 4, index stride 16 */
1552         /* shader will patch stride and desc[2] */
1553         desc[0] = gsvs_va;
1554         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1555                 S_008F04_STRIDE(0) |
1556                 S_008F04_SWIZZLE_ENABLE(true);
1557         desc[2] = 0;
1558         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1559                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1560                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1561                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1562                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1563                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1564                 S_008F0C_ELEMENT_SIZE(1) |
1565                 S_008F0C_INDEX_STRIDE(1) |
1566                 S_008F0C_ADD_TID_ENABLE(true);
1567         desc += 4;
1568
1569         desc[0] = tess_va;
1570         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
1571                 S_008F04_STRIDE(0) |
1572                 S_008F04_SWIZZLE_ENABLE(false);
1573         desc[2] = tess_factor_ring_size;
1574         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1575                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1576                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1577                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1578                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1579                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1580                 S_008F0C_ELEMENT_SIZE(0) |
1581                 S_008F0C_INDEX_STRIDE(0) |
1582                 S_008F0C_ADD_TID_ENABLE(false);
1583         desc += 4;
1584
1585         desc[0] = tess_offchip_va;
1586         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1587                 S_008F04_STRIDE(0) |
1588                 S_008F04_SWIZZLE_ENABLE(false);
1589         desc[2] = tess_offchip_ring_size;
1590         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1591                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1592                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1593                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1594                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1595                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1596                 S_008F0C_ELEMENT_SIZE(0) |
1597                 S_008F0C_INDEX_STRIDE(0) |
1598                 S_008F0C_ADD_TID_ENABLE(false);
1599         desc += 4;
1600
1601         /* add sample positions after all rings */
1602         memcpy(desc, queue->device->sample_locations_1x, 8);
1603         desc += 2;
1604         memcpy(desc, queue->device->sample_locations_2x, 16);
1605         desc += 4;
1606         memcpy(desc, queue->device->sample_locations_4x, 32);
1607         desc += 8;
1608         memcpy(desc, queue->device->sample_locations_8x, 64);
1609         desc += 16;
1610         memcpy(desc, queue->device->sample_locations_16x, 128);
1611 }
1612
1613 static unsigned
1614 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1615 {
1616         bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1617                 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1618                 device->physical_device->rad_info.family != CHIP_STONEY;
1619         unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1620         unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1621                 device->physical_device->rad_info.max_se;
1622         unsigned offchip_granularity;
1623         unsigned hs_offchip_param;
1624         switch (device->tess_offchip_block_dw_size) {
1625         default:
1626                 assert(0);
1627                 /* fall through */
1628         case 8192:
1629                 offchip_granularity = V_03093C_X_8K_DWORDS;
1630                 break;
1631         case 4096:
1632                 offchip_granularity = V_03093C_X_4K_DWORDS;
1633                 break;
1634         }
1635
1636         switch (device->physical_device->rad_info.chip_class) {
1637         case SI:
1638                 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1639                 break;
1640         case CIK:
1641         case VI:
1642         case GFX9:
1643         default:
1644                 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1645                 break;
1646         }
1647
1648         *max_offchip_buffers_p = max_offchip_buffers;
1649         if (device->physical_device->rad_info.chip_class >= CIK) {
1650                 if (device->physical_device->rad_info.chip_class >= VI)
1651                         --max_offchip_buffers;
1652                 hs_offchip_param =
1653                         S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1654                         S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1655         } else {
1656                 hs_offchip_param =
1657                         S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1658         }
1659         return hs_offchip_param;
1660 }
1661
1662 static VkResult
1663 radv_get_preamble_cs(struct radv_queue *queue,
1664                      uint32_t scratch_size,
1665                      uint32_t compute_scratch_size,
1666                      uint32_t esgs_ring_size,
1667                      uint32_t gsvs_ring_size,
1668                      bool needs_tess_rings,
1669                      bool needs_sample_positions,
1670                      struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1671                      struct radeon_winsys_cs **initial_preamble_cs,
1672                      struct radeon_winsys_cs **continue_preamble_cs)
1673 {
1674         struct radeon_winsys_bo *scratch_bo = NULL;
1675         struct radeon_winsys_bo *descriptor_bo = NULL;
1676         struct radeon_winsys_bo *compute_scratch_bo = NULL;
1677         struct radeon_winsys_bo *esgs_ring_bo = NULL;
1678         struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1679         struct radeon_winsys_bo *tess_rings_bo = NULL;
1680         struct radeon_winsys_cs *dest_cs[3] = {0};
1681         bool add_tess_rings = false, add_sample_positions = false;
1682         unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1683         unsigned max_offchip_buffers;
1684         unsigned hs_offchip_param = 0;
1685         unsigned tess_offchip_ring_offset;
1686         uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1687         if (!queue->has_tess_rings) {
1688                 if (needs_tess_rings)
1689                         add_tess_rings = true;
1690         }
1691         if (!queue->has_sample_positions) {
1692                 if (needs_sample_positions)
1693                         add_sample_positions = true;
1694         }
1695         tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1696         hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1697                                                      &max_offchip_buffers);
1698         tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
1699         tess_offchip_ring_size = max_offchip_buffers *
1700                 queue->device->tess_offchip_block_dw_size * 4;
1701
1702         if (scratch_size <= queue->scratch_size &&
1703             compute_scratch_size <= queue->compute_scratch_size &&
1704             esgs_ring_size <= queue->esgs_ring_size &&
1705             gsvs_ring_size <= queue->gsvs_ring_size &&
1706             !add_tess_rings && !add_sample_positions &&
1707             queue->initial_preamble_cs) {
1708                 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1709                 *initial_preamble_cs = queue->initial_preamble_cs;
1710                 *continue_preamble_cs = queue->continue_preamble_cs;
1711                 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1712                         *continue_preamble_cs = NULL;
1713                 return VK_SUCCESS;
1714         }
1715
1716         if (scratch_size > queue->scratch_size) {
1717                 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1718                                                               scratch_size,
1719                                                               4096,
1720                                                               RADEON_DOMAIN_VRAM,
1721                                                               ring_bo_flags);
1722                 if (!scratch_bo)
1723                         goto fail;
1724         } else
1725                 scratch_bo = queue->scratch_bo;
1726
1727         if (compute_scratch_size > queue->compute_scratch_size) {
1728                 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1729                                                                       compute_scratch_size,
1730                                                                       4096,
1731                                                                       RADEON_DOMAIN_VRAM,
1732                                                                       ring_bo_flags);
1733                 if (!compute_scratch_bo)
1734                         goto fail;
1735
1736         } else
1737                 compute_scratch_bo = queue->compute_scratch_bo;
1738
1739         if (esgs_ring_size > queue->esgs_ring_size) {
1740                 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1741                                                                 esgs_ring_size,
1742                                                                 4096,
1743                                                                 RADEON_DOMAIN_VRAM,
1744                                                                 ring_bo_flags);
1745                 if (!esgs_ring_bo)
1746                         goto fail;
1747         } else {
1748                 esgs_ring_bo = queue->esgs_ring_bo;
1749                 esgs_ring_size = queue->esgs_ring_size;
1750         }
1751
1752         if (gsvs_ring_size > queue->gsvs_ring_size) {
1753                 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1754                                                                 gsvs_ring_size,
1755                                                                 4096,
1756                                                                 RADEON_DOMAIN_VRAM,
1757                                                                 ring_bo_flags);
1758                 if (!gsvs_ring_bo)
1759                         goto fail;
1760         } else {
1761                 gsvs_ring_bo = queue->gsvs_ring_bo;
1762                 gsvs_ring_size = queue->gsvs_ring_size;
1763         }
1764
1765         if (add_tess_rings) {
1766                 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
1767                                                                  tess_offchip_ring_offset + tess_offchip_ring_size,
1768                                                                  256,
1769                                                                  RADEON_DOMAIN_VRAM,
1770                                                                  ring_bo_flags);
1771                 if (!tess_rings_bo)
1772                         goto fail;
1773         } else {
1774                 tess_rings_bo = queue->tess_rings_bo;
1775         }
1776
1777         if (scratch_bo != queue->scratch_bo ||
1778             esgs_ring_bo != queue->esgs_ring_bo ||
1779             gsvs_ring_bo != queue->gsvs_ring_bo ||
1780             tess_rings_bo != queue->tess_rings_bo ||
1781             add_sample_positions) {
1782                 uint32_t size = 0;
1783                 if (gsvs_ring_bo || esgs_ring_bo ||
1784                     tess_rings_bo || add_sample_positions) {
1785                         size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1786                         if (add_sample_positions)
1787                                 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1788                 }
1789                 else if (scratch_bo)
1790                         size = 8; /* 2 dword */
1791
1792                 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1793                                                                  size,
1794                                                                  4096,
1795                                                                  RADEON_DOMAIN_VRAM,
1796                                                                  RADEON_FLAG_CPU_ACCESS |
1797                                                                  RADEON_FLAG_NO_INTERPROCESS_SHARING |
1798                                                                  RADEON_FLAG_READ_ONLY);
1799                 if (!descriptor_bo)
1800                         goto fail;
1801         } else
1802                 descriptor_bo = queue->descriptor_bo;
1803
1804         for(int i = 0; i < 3; ++i) {
1805                 struct radeon_winsys_cs *cs = NULL;
1806                 cs = queue->device->ws->cs_create(queue->device->ws,
1807                                                   queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1808                 if (!cs)
1809                         goto fail;
1810
1811                 dest_cs[i] = cs;
1812
1813                 if (scratch_bo)
1814                         radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1815
1816                 if (esgs_ring_bo)
1817                         radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1818
1819                 if (gsvs_ring_bo)
1820                         radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1821
1822                 if (tess_rings_bo)
1823                         radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo, 8);
1824
1825                 if (descriptor_bo)
1826                         radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1827
1828                 if (descriptor_bo != queue->descriptor_bo) {
1829                         uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1830
1831                         if (scratch_bo) {
1832                                 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1833                                 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1834                                                  S_008F04_SWIZZLE_ENABLE(1);
1835                                 map[0] = scratch_va;
1836                                 map[1] = rsrc1;
1837                         }
1838
1839                         if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
1840                             add_sample_positions)
1841                                 fill_geom_tess_rings(queue, map, add_sample_positions,
1842                                                      esgs_ring_size, esgs_ring_bo,
1843                                                      gsvs_ring_size, gsvs_ring_bo,
1844                                                      tess_factor_ring_size,
1845                                                      tess_offchip_ring_offset,
1846                                                      tess_offchip_ring_size,
1847                                                      tess_rings_bo);
1848
1849                         queue->device->ws->buffer_unmap(descriptor_bo);
1850                 }
1851
1852                 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
1853                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1854                         radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1855                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1856                         radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1857                 }
1858
1859                 if (esgs_ring_bo || gsvs_ring_bo) {
1860                         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1861                                 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1862                                 radeon_emit(cs, esgs_ring_size >> 8);
1863                                 radeon_emit(cs, gsvs_ring_size >> 8);
1864                         } else {
1865                                 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1866                                 radeon_emit(cs, esgs_ring_size >> 8);
1867                                 radeon_emit(cs, gsvs_ring_size >> 8);
1868                         }
1869                 }
1870
1871                 if (tess_rings_bo) {
1872                         uint64_t tf_va = radv_buffer_get_va(tess_rings_bo);
1873                         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1874                                 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1875                                                        S_030938_SIZE(tess_factor_ring_size / 4));
1876                                 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1877                                                        tf_va >> 8);
1878                                 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1879                                         radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1880                                                                tf_va >> 40);
1881                                 }
1882                                 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1883                         } else {
1884                                 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1885                                                       S_008988_SIZE(tess_factor_ring_size / 4));
1886                                 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1887                                                       tf_va >> 8);
1888                                 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1889                                                       hs_offchip_param);
1890                         }
1891                 }
1892
1893                 if (descriptor_bo) {
1894                         uint64_t va = radv_buffer_get_va(descriptor_bo);
1895                         if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1896                                 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1897                                                 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1898                                                 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1899                                                 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1900
1901                                 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1902                                         radeon_set_sh_reg_seq(cs, regs[i], 2);
1903                                         radeon_emit(cs, va);
1904                                         radeon_emit(cs, va >> 32);
1905                                 }
1906                         } else {
1907                                 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1908                                                 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1909                                                 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1910                                                 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1911                                                 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1912                                                 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1913
1914                                 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1915                                         radeon_set_sh_reg_seq(cs, regs[i], 2);
1916                                         radeon_emit(cs, va);
1917                                         radeon_emit(cs, va >> 32);
1918                                 }
1919                         }
1920                 }
1921
1922                 if (compute_scratch_bo) {
1923                         uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1924                         uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1925                                          S_008F04_SWIZZLE_ENABLE(1);
1926
1927                         radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1928
1929                         radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1930                         radeon_emit(cs, scratch_va);
1931                         radeon_emit(cs, rsrc1);
1932                 }
1933
1934                 if (i == 0) {
1935                         si_cs_emit_cache_flush(cs,
1936                                                queue->device->physical_device->rad_info.chip_class,
1937                                                NULL, 0,
1938                                                queue->queue_family_index == RING_COMPUTE &&
1939                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
1940                                                (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1941                                                RADV_CMD_FLAG_INV_ICACHE |
1942                                                RADV_CMD_FLAG_INV_SMEM_L1 |
1943                                                RADV_CMD_FLAG_INV_VMEM_L1 |
1944                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
1945                 } else if (i == 1) {
1946                         si_cs_emit_cache_flush(cs,
1947                                                queue->device->physical_device->rad_info.chip_class,
1948                                                NULL, 0,
1949                                                queue->queue_family_index == RING_COMPUTE &&
1950                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
1951                                                RADV_CMD_FLAG_INV_ICACHE |
1952                                                RADV_CMD_FLAG_INV_SMEM_L1 |
1953                                                RADV_CMD_FLAG_INV_VMEM_L1 |
1954                                                RADV_CMD_FLAG_INV_GLOBAL_L2);
1955                 }
1956
1957                 if (!queue->device->ws->cs_finalize(cs))
1958                         goto fail;
1959         }
1960
1961         if (queue->initial_full_flush_preamble_cs)
1962                         queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1963
1964         if (queue->initial_preamble_cs)
1965                         queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1966
1967         if (queue->continue_preamble_cs)
1968                         queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1969
1970         queue->initial_full_flush_preamble_cs = dest_cs[0];
1971         queue->initial_preamble_cs = dest_cs[1];
1972         queue->continue_preamble_cs = dest_cs[2];
1973
1974         if (scratch_bo != queue->scratch_bo) {
1975                 if (queue->scratch_bo)
1976                         queue->device->ws->buffer_destroy(queue->scratch_bo);
1977                 queue->scratch_bo = scratch_bo;
1978                 queue->scratch_size = scratch_size;
1979         }
1980
1981         if (compute_scratch_bo != queue->compute_scratch_bo) {
1982                 if (queue->compute_scratch_bo)
1983                         queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1984                 queue->compute_scratch_bo = compute_scratch_bo;
1985                 queue->compute_scratch_size = compute_scratch_size;
1986         }
1987
1988         if (esgs_ring_bo != queue->esgs_ring_bo) {
1989                 if (queue->esgs_ring_bo)
1990                         queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1991                 queue->esgs_ring_bo = esgs_ring_bo;
1992                 queue->esgs_ring_size = esgs_ring_size;
1993         }
1994
1995         if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1996                 if (queue->gsvs_ring_bo)
1997                         queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1998                 queue->gsvs_ring_bo = gsvs_ring_bo;
1999                 queue->gsvs_ring_size = gsvs_ring_size;
2000         }
2001
2002         if (tess_rings_bo != queue->tess_rings_bo) {
2003                 queue->tess_rings_bo = tess_rings_bo;
2004                 queue->has_tess_rings = true;
2005         }
2006
2007         if (descriptor_bo != queue->descriptor_bo) {
2008                 if (queue->descriptor_bo)
2009                         queue->device->ws->buffer_destroy(queue->descriptor_bo);
2010
2011                 queue->descriptor_bo = descriptor_bo;
2012         }
2013
2014         if (add_sample_positions)
2015                 queue->has_sample_positions = true;
2016
2017         *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2018         *initial_preamble_cs = queue->initial_preamble_cs;
2019         *continue_preamble_cs = queue->continue_preamble_cs;
2020         if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2021                         *continue_preamble_cs = NULL;
2022         return VK_SUCCESS;
2023 fail:
2024         for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2025                 if (dest_cs[i])
2026                         queue->device->ws->cs_destroy(dest_cs[i]);
2027         if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2028                 queue->device->ws->buffer_destroy(descriptor_bo);
2029         if (scratch_bo && scratch_bo != queue->scratch_bo)
2030                 queue->device->ws->buffer_destroy(scratch_bo);
2031         if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2032                 queue->device->ws->buffer_destroy(compute_scratch_bo);
2033         if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2034                 queue->device->ws->buffer_destroy(esgs_ring_bo);
2035         if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2036                 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2037         if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2038                 queue->device->ws->buffer_destroy(tess_rings_bo);
2039         return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2040 }
2041
2042 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
2043                                       int num_sems,
2044                                       const VkSemaphore *sems,
2045                                       VkFence _fence,
2046                                       bool reset_temp)
2047 {
2048         int syncobj_idx = 0, sem_idx = 0;
2049
2050         if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2051                 return VK_SUCCESS;
2052
2053         for (uint32_t i = 0; i < num_sems; i++) {
2054                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2055
2056                 if (sem->temp_syncobj || sem->syncobj)
2057                         counts->syncobj_count++;
2058                 else
2059                         counts->sem_count++;
2060         }
2061
2062         if (_fence != VK_NULL_HANDLE) {
2063                 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2064                 if (fence->temp_syncobj || fence->syncobj)
2065                         counts->syncobj_count++;
2066         }
2067
2068         if (counts->syncobj_count) {
2069                 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2070                 if (!counts->syncobj)
2071                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2072         }
2073
2074         if (counts->sem_count) {
2075                 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2076                 if (!counts->sem) {
2077                         free(counts->syncobj);
2078                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2079                 }
2080         }
2081
2082         for (uint32_t i = 0; i < num_sems; i++) {
2083                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2084
2085                 if (sem->temp_syncobj) {
2086                         counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2087                 }
2088                 else if (sem->syncobj)
2089                         counts->syncobj[syncobj_idx++] = sem->syncobj;
2090                 else {
2091                         assert(sem->sem);
2092                         counts->sem[sem_idx++] = sem->sem;
2093                 }
2094         }
2095
2096         if (_fence != VK_NULL_HANDLE) {
2097                 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2098                 if (fence->temp_syncobj)
2099                         counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2100                 else if (fence->syncobj)
2101                         counts->syncobj[syncobj_idx++] = fence->syncobj;
2102         }
2103
2104         return VK_SUCCESS;
2105 }
2106
2107 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2108 {
2109         free(sem_info->wait.syncobj);
2110         free(sem_info->wait.sem);
2111         free(sem_info->signal.syncobj);
2112         free(sem_info->signal.sem);
2113 }
2114
2115
2116 static void radv_free_temp_syncobjs(struct radv_device *device,
2117                                     int num_sems,
2118                                     const VkSemaphore *sems)
2119 {
2120         for (uint32_t i = 0; i < num_sems; i++) {
2121                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2122
2123                 if (sem->temp_syncobj) {
2124                         device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2125                         sem->temp_syncobj = 0;
2126                 }
2127         }
2128 }
2129
2130 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
2131                              int num_wait_sems,
2132                              const VkSemaphore *wait_sems,
2133                              int num_signal_sems,
2134                              const VkSemaphore *signal_sems,
2135                              VkFence fence)
2136 {
2137         VkResult ret;
2138         memset(sem_info, 0, sizeof(*sem_info));
2139
2140         ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2141         if (ret)
2142                 return ret;
2143         ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
2144         if (ret)
2145                 radv_free_sem_info(sem_info);
2146
2147         /* caller can override these */
2148         sem_info->cs_emit_wait = true;
2149         sem_info->cs_emit_signal = true;
2150         return ret;
2151 }
2152
2153 /* Signals fence as soon as all the work currently put on queue is done. */
2154 static VkResult radv_signal_fence(struct radv_queue *queue,
2155                               struct radv_fence *fence)
2156 {
2157         int ret;
2158         VkResult result;
2159         struct radv_winsys_sem_info sem_info;
2160
2161         result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
2162                                      radv_fence_to_handle(fence));
2163         if (result != VK_SUCCESS)
2164                 return result;
2165
2166         ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2167                                            &queue->device->empty_cs[queue->queue_family_index],
2168                                            1, NULL, NULL, &sem_info,
2169                                            false, fence->fence);
2170         radv_free_sem_info(&sem_info);
2171
2172         /* TODO: find a better error */
2173         if (ret)
2174                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2175
2176         return VK_SUCCESS;
2177 }
2178
2179 VkResult radv_QueueSubmit(
2180         VkQueue                                     _queue,
2181         uint32_t                                    submitCount,
2182         const VkSubmitInfo*                         pSubmits,
2183         VkFence                                     _fence)
2184 {
2185         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2186         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2187         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2188         struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2189         int ret;
2190         uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2191         uint32_t scratch_size = 0;
2192         uint32_t compute_scratch_size = 0;
2193         uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2194         struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2195         VkResult result;
2196         bool fence_emitted = false;
2197         bool tess_rings_needed = false;
2198         bool sample_positions_needed = false;
2199
2200         /* Do this first so failing to allocate scratch buffers can't result in
2201          * partially executed submissions. */
2202         for (uint32_t i = 0; i < submitCount; i++) {
2203                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2204                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2205                                          pSubmits[i].pCommandBuffers[j]);
2206
2207                         scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2208                         compute_scratch_size = MAX2(compute_scratch_size,
2209                                                     cmd_buffer->compute_scratch_size_needed);
2210                         esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2211                         gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2212                         tess_rings_needed |= cmd_buffer->tess_rings_needed;
2213                         sample_positions_needed |= cmd_buffer->sample_positions_needed;
2214                 }
2215         }
2216
2217         result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2218                                       esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2219                                       sample_positions_needed, &initial_flush_preamble_cs,
2220                                       &initial_preamble_cs, &continue_preamble_cs);
2221         if (result != VK_SUCCESS)
2222                 return result;
2223
2224         for (uint32_t i = 0; i < submitCount; i++) {
2225                 struct radeon_winsys_cs **cs_array;
2226                 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2227                 bool can_patch = true;
2228                 uint32_t advance;
2229                 struct radv_winsys_sem_info sem_info;
2230
2231                 result = radv_alloc_sem_info(&sem_info,
2232                                              pSubmits[i].waitSemaphoreCount,
2233                                              pSubmits[i].pWaitSemaphores,
2234                                              pSubmits[i].signalSemaphoreCount,
2235                                              pSubmits[i].pSignalSemaphores,
2236                                              _fence);
2237                 if (result != VK_SUCCESS)
2238                         return result;
2239
2240                 if (!pSubmits[i].commandBufferCount) {
2241                         if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2242                                 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2243                                                                    &queue->device->empty_cs[queue->queue_family_index],
2244                                                                    1, NULL, NULL,
2245                                                                    &sem_info,
2246                                                                    false, base_fence);
2247                                 if (ret) {
2248                                         radv_loge("failed to submit CS %d\n", i);
2249                                         abort();
2250                                 }
2251                                 fence_emitted = true;
2252                         }
2253                         radv_free_sem_info(&sem_info);
2254                         continue;
2255                 }
2256
2257                 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2258                                                 (pSubmits[i].commandBufferCount));
2259
2260                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2261                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2262                                          pSubmits[i].pCommandBuffers[j]);
2263                         assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2264
2265                         cs_array[j] = cmd_buffer->cs;
2266                         if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2267                                 can_patch = false;
2268
2269                         cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2270                 }
2271
2272                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2273                         struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2274                         advance = MIN2(max_cs_submission,
2275                                        pSubmits[i].commandBufferCount - j);
2276
2277                         if (queue->device->trace_bo)
2278                                 *queue->device->trace_id_ptr = 0;
2279
2280                         sem_info.cs_emit_wait = j == 0;
2281                         sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2282
2283                         ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2284                                                         advance, initial_preamble, continue_preamble_cs,
2285                                                            &sem_info,
2286                                                         can_patch, base_fence);
2287
2288                         if (ret) {
2289                                 radv_loge("failed to submit CS %d\n", i);
2290                                 abort();
2291                         }
2292                         fence_emitted = true;
2293                         if (queue->device->trace_bo) {
2294                                 radv_check_gpu_hangs(queue, cs_array[j]);
2295                         }
2296                 }
2297
2298                 radv_free_temp_syncobjs(queue->device,
2299                                         pSubmits[i].waitSemaphoreCount,
2300                                         pSubmits[i].pWaitSemaphores);
2301                 radv_free_sem_info(&sem_info);
2302                 free(cs_array);
2303         }
2304
2305         if (fence) {
2306                 if (!fence_emitted) {
2307                         radv_signal_fence(queue, fence);
2308                 }
2309                 fence->submitted = true;
2310         }
2311
2312         return VK_SUCCESS;
2313 }
2314
2315 VkResult radv_QueueWaitIdle(
2316         VkQueue                                     _queue)
2317 {
2318         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2319
2320         queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2321                                          radv_queue_family_to_ring(queue->queue_family_index),
2322                                          queue->queue_idx);
2323         return VK_SUCCESS;
2324 }
2325
2326 VkResult radv_DeviceWaitIdle(
2327         VkDevice                                    _device)
2328 {
2329         RADV_FROM_HANDLE(radv_device, device, _device);
2330
2331         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2332                 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2333                         radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2334                 }
2335         }
2336         return VK_SUCCESS;
2337 }
2338
2339 VkResult radv_EnumerateInstanceExtensionProperties(
2340     const char*                                 pLayerName,
2341     uint32_t*                                   pPropertyCount,
2342     VkExtensionProperties*                      pProperties)
2343 {
2344         VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2345
2346         for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2347                 if (radv_supported_instance_extensions.extensions[i]) {
2348                         vk_outarray_append(&out, prop) {
2349                                 *prop = radv_instance_extensions[i];
2350                         }
2351                 }
2352         }
2353
2354         return vk_outarray_status(&out);
2355 }
2356
2357 VkResult radv_EnumerateDeviceExtensionProperties(
2358     VkPhysicalDevice                            physicalDevice,
2359     const char*                                 pLayerName,
2360     uint32_t*                                   pPropertyCount,
2361     VkExtensionProperties*                      pProperties)
2362 {
2363         RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2364         VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2365
2366         for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2367                 if (device->supported_extensions.extensions[i]) {
2368                         vk_outarray_append(&out, prop) {
2369                                 *prop = radv_device_extensions[i];
2370                         }
2371                 }
2372         }
2373
2374         return vk_outarray_status(&out);
2375 }
2376
2377 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2378         VkInstance                                  _instance,
2379         const char*                                 pName)
2380 {
2381         RADV_FROM_HANDLE(radv_instance, instance, _instance);
2382
2383         return radv_lookup_entrypoint_checked(pName,
2384                                               instance ? instance->apiVersion : 0,
2385                                               instance ? &instance->enabled_extensions : NULL,
2386                                               NULL);
2387 }
2388
2389 /* The loader wants us to expose a second GetInstanceProcAddr function
2390  * to work around certain LD_PRELOAD issues seen in apps.
2391  */
2392 PUBLIC
2393 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2394         VkInstance                                  instance,
2395         const char*                                 pName);
2396
2397 PUBLIC
2398 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2399         VkInstance                                  instance,
2400         const char*                                 pName)
2401 {
2402         return radv_GetInstanceProcAddr(instance, pName);
2403 }
2404
2405 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2406         VkDevice                                    _device,
2407         const char*                                 pName)
2408 {
2409         RADV_FROM_HANDLE(radv_device, device, _device);
2410
2411         return radv_lookup_entrypoint_checked(pName,
2412                                               device->instance->apiVersion,
2413                                               &device->instance->enabled_extensions,
2414                                               &device->enabled_extensions);
2415 }
2416
2417 bool radv_get_memory_fd(struct radv_device *device,
2418                         struct radv_device_memory *memory,
2419                         int *pFD)
2420 {
2421         struct radeon_bo_metadata metadata;
2422
2423         if (memory->image) {
2424                 radv_init_metadata(device, memory->image, &metadata);
2425                 device->ws->buffer_set_metadata(memory->bo, &metadata);
2426         }
2427
2428         return device->ws->buffer_get_fd(device->ws, memory->bo,
2429                                          pFD);
2430 }
2431
2432 static VkResult radv_alloc_memory(struct radv_device *device,
2433                                   const VkMemoryAllocateInfo*     pAllocateInfo,
2434                                   const VkAllocationCallbacks*    pAllocator,
2435                                   VkDeviceMemory*                 pMem)
2436 {
2437         struct radv_device_memory *mem;
2438         VkResult result;
2439         enum radeon_bo_domain domain;
2440         uint32_t flags = 0;
2441         enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2442
2443         assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2444
2445         if (pAllocateInfo->allocationSize == 0) {
2446                 /* Apparently, this is allowed */
2447                 *pMem = VK_NULL_HANDLE;
2448                 return VK_SUCCESS;
2449         }
2450
2451         const VkImportMemoryFdInfoKHR *import_info =
2452                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2453         const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2454                 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2455         const VkExportMemoryAllocateInfoKHR *export_info =
2456                 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2457         const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2458                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2459
2460         const struct wsi_memory_allocate_info *wsi_info =
2461                 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2462
2463         mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2464                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2465         if (mem == NULL)
2466                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2467
2468         if (wsi_info && wsi_info->implicit_sync)
2469                 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2470
2471         if (dedicate_info) {
2472                 mem->image = radv_image_from_handle(dedicate_info->image);
2473                 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2474         } else {
2475                 mem->image = NULL;
2476                 mem->buffer = NULL;
2477         }
2478
2479         mem->user_ptr = NULL;
2480
2481         if (import_info) {
2482                 assert(import_info->handleType ==
2483                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2484                        import_info->handleType ==
2485                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2486                 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2487                                                      NULL, NULL);
2488                 if (!mem->bo) {
2489                         result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2490                         goto fail;
2491                 } else {
2492                         close(import_info->fd);
2493                         goto out_success;
2494                 }
2495         }
2496
2497         if (host_ptr_info) {
2498                 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
2499                 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
2500                 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
2501                                                       pAllocateInfo->allocationSize);
2502                 if (!mem->bo) {
2503                         result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2504                         goto fail;
2505                 } else {
2506                         mem->user_ptr = host_ptr_info->pHostPointer;
2507                         goto out_success;
2508                 }
2509         }
2510
2511         uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2512         if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2513             mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2514                 domain = RADEON_DOMAIN_GTT;
2515         else
2516                 domain = RADEON_DOMAIN_VRAM;
2517
2518         if (mem_type_index == RADV_MEM_TYPE_VRAM)
2519                 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2520         else
2521                 flags |= RADEON_FLAG_CPU_ACCESS;
2522
2523         if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2524                 flags |= RADEON_FLAG_GTT_WC;
2525
2526         if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
2527                 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2528
2529         mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2530                                                domain, flags);
2531
2532         if (!mem->bo) {
2533                 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2534                 goto fail;
2535         }
2536         mem->type_index = mem_type_index;
2537 out_success:
2538         *pMem = radv_device_memory_to_handle(mem);
2539
2540         return VK_SUCCESS;
2541
2542 fail:
2543         vk_free2(&device->alloc, pAllocator, mem);
2544
2545         return result;
2546 }
2547
2548 VkResult radv_AllocateMemory(
2549         VkDevice                                    _device,
2550         const VkMemoryAllocateInfo*                 pAllocateInfo,
2551         const VkAllocationCallbacks*                pAllocator,
2552         VkDeviceMemory*                             pMem)
2553 {
2554         RADV_FROM_HANDLE(radv_device, device, _device);
2555         return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2556 }
2557
2558 void radv_FreeMemory(
2559         VkDevice                                    _device,
2560         VkDeviceMemory                              _mem,
2561         const VkAllocationCallbacks*                pAllocator)
2562 {
2563         RADV_FROM_HANDLE(radv_device, device, _device);
2564         RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2565
2566         if (mem == NULL)
2567                 return;
2568
2569         device->ws->buffer_destroy(mem->bo);
2570         mem->bo = NULL;
2571
2572         vk_free2(&device->alloc, pAllocator, mem);
2573 }
2574
2575 VkResult radv_MapMemory(
2576         VkDevice                                    _device,
2577         VkDeviceMemory                              _memory,
2578         VkDeviceSize                                offset,
2579         VkDeviceSize                                size,
2580         VkMemoryMapFlags                            flags,
2581         void**                                      ppData)
2582 {
2583         RADV_FROM_HANDLE(radv_device, device, _device);
2584         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2585
2586         if (mem == NULL) {
2587                 *ppData = NULL;
2588                 return VK_SUCCESS;
2589         }
2590
2591         if (mem->user_ptr)
2592                 *ppData = mem->user_ptr;
2593         else
2594                 *ppData = device->ws->buffer_map(mem->bo);
2595
2596         if (*ppData) {
2597                 *ppData += offset;
2598                 return VK_SUCCESS;
2599         }
2600
2601         return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2602 }
2603
2604 void radv_UnmapMemory(
2605         VkDevice                                    _device,
2606         VkDeviceMemory                              _memory)
2607 {
2608         RADV_FROM_HANDLE(radv_device, device, _device);
2609         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2610
2611         if (mem == NULL)
2612                 return;
2613
2614         if (mem->user_ptr == NULL)
2615                 device->ws->buffer_unmap(mem->bo);
2616 }
2617
2618 VkResult radv_FlushMappedMemoryRanges(
2619         VkDevice                                    _device,
2620         uint32_t                                    memoryRangeCount,
2621         const VkMappedMemoryRange*                  pMemoryRanges)
2622 {
2623         return VK_SUCCESS;
2624 }
2625
2626 VkResult radv_InvalidateMappedMemoryRanges(
2627         VkDevice                                    _device,
2628         uint32_t                                    memoryRangeCount,
2629         const VkMappedMemoryRange*                  pMemoryRanges)
2630 {
2631         return VK_SUCCESS;
2632 }
2633
2634 void radv_GetBufferMemoryRequirements(
2635         VkDevice                                    _device,
2636         VkBuffer                                    _buffer,
2637         VkMemoryRequirements*                       pMemoryRequirements)
2638 {
2639         RADV_FROM_HANDLE(radv_device, device, _device);
2640         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2641
2642         pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2643
2644         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2645                 pMemoryRequirements->alignment = 4096;
2646         else
2647                 pMemoryRequirements->alignment = 16;
2648
2649         pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2650 }
2651
2652 void radv_GetBufferMemoryRequirements2(
2653         VkDevice                                     device,
2654         const VkBufferMemoryRequirementsInfo2KHR*    pInfo,
2655         VkMemoryRequirements2KHR*                    pMemoryRequirements)
2656 {
2657         radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2658                                         &pMemoryRequirements->memoryRequirements);
2659         RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2660         vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2661                 switch (ext->sType) {
2662                 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2663                         VkMemoryDedicatedRequirementsKHR *req =
2664                                        (VkMemoryDedicatedRequirementsKHR *) ext;
2665                         req->requiresDedicatedAllocation = buffer->shareable;
2666                         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2667                         break;
2668                 }
2669                 default:
2670                         break;
2671                 }
2672         }
2673 }
2674
2675 void radv_GetImageMemoryRequirements(
2676         VkDevice                                    _device,
2677         VkImage                                     _image,
2678         VkMemoryRequirements*                       pMemoryRequirements)
2679 {
2680         RADV_FROM_HANDLE(radv_device, device, _device);
2681         RADV_FROM_HANDLE(radv_image, image, _image);
2682
2683         pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2684
2685         pMemoryRequirements->size = image->size;
2686         pMemoryRequirements->alignment = image->alignment;
2687 }
2688
2689 void radv_GetImageMemoryRequirements2(
2690         VkDevice                                    device,
2691         const VkImageMemoryRequirementsInfo2KHR*    pInfo,
2692         VkMemoryRequirements2KHR*                   pMemoryRequirements)
2693 {
2694         radv_GetImageMemoryRequirements(device, pInfo->image,
2695                                         &pMemoryRequirements->memoryRequirements);
2696
2697         RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2698
2699         vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2700                 switch (ext->sType) {
2701                 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2702                         VkMemoryDedicatedRequirementsKHR *req =
2703                                        (VkMemoryDedicatedRequirementsKHR *) ext;
2704                         req->requiresDedicatedAllocation = image->shareable;
2705                         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2706                         break;
2707                 }
2708                 default:
2709                         break;
2710                 }
2711         }
2712 }
2713
2714 void radv_GetImageSparseMemoryRequirements(
2715         VkDevice                                    device,
2716         VkImage                                     image,
2717         uint32_t*                                   pSparseMemoryRequirementCount,
2718         VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
2719 {
2720         stub();
2721 }
2722
2723 void radv_GetImageSparseMemoryRequirements2(
2724         VkDevice                                    device,
2725         const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2726         uint32_t*                                   pSparseMemoryRequirementCount,
2727         VkSparseImageMemoryRequirements2KHR*            pSparseMemoryRequirements)
2728 {
2729         stub();
2730 }
2731
2732 void radv_GetDeviceMemoryCommitment(
2733         VkDevice                                    device,
2734         VkDeviceMemory                              memory,
2735         VkDeviceSize*                               pCommittedMemoryInBytes)
2736 {
2737         *pCommittedMemoryInBytes = 0;
2738 }
2739
2740 VkResult radv_BindBufferMemory2(VkDevice device,
2741                                 uint32_t bindInfoCount,
2742                                 const VkBindBufferMemoryInfoKHR *pBindInfos)
2743 {
2744         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2745                 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2746                 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2747
2748                 if (mem) {
2749                         buffer->bo = mem->bo;
2750                         buffer->offset = pBindInfos[i].memoryOffset;
2751                 } else {
2752                         buffer->bo = NULL;
2753                 }
2754         }
2755         return VK_SUCCESS;
2756 }
2757
2758 VkResult radv_BindBufferMemory(
2759         VkDevice                                    device,
2760         VkBuffer                                    buffer,
2761         VkDeviceMemory                              memory,
2762         VkDeviceSize                                memoryOffset)
2763 {
2764         const VkBindBufferMemoryInfoKHR info = {
2765                 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2766                 .buffer = buffer,
2767                 .memory = memory,
2768                 .memoryOffset = memoryOffset
2769         };
2770
2771         return radv_BindBufferMemory2(device, 1, &info);
2772 }
2773
2774 VkResult radv_BindImageMemory2(VkDevice device,
2775                                uint32_t bindInfoCount,
2776                                const VkBindImageMemoryInfoKHR *pBindInfos)
2777 {
2778         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2779                 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2780                 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2781
2782                 if (mem) {
2783                         image->bo = mem->bo;
2784                         image->offset = pBindInfos[i].memoryOffset;
2785                 } else {
2786                         image->bo = NULL;
2787                         image->offset = 0;
2788                 }
2789         }
2790         return VK_SUCCESS;
2791 }
2792
2793
2794 VkResult radv_BindImageMemory(
2795         VkDevice                                    device,
2796         VkImage                                     image,
2797         VkDeviceMemory                              memory,
2798         VkDeviceSize                                memoryOffset)
2799 {
2800         const VkBindImageMemoryInfoKHR info = {
2801                 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2802                 .image = image,
2803                 .memory = memory,
2804                 .memoryOffset = memoryOffset
2805         };
2806
2807         return radv_BindImageMemory2(device, 1, &info);
2808 }
2809
2810
2811 static void
2812 radv_sparse_buffer_bind_memory(struct radv_device *device,
2813                                const VkSparseBufferMemoryBindInfo *bind)
2814 {
2815         RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2816
2817         for (uint32_t i = 0; i < bind->bindCount; ++i) {
2818                 struct radv_device_memory *mem = NULL;
2819
2820                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2821                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2822
2823                 device->ws->buffer_virtual_bind(buffer->bo,
2824                                                 bind->pBinds[i].resourceOffset,
2825                                                 bind->pBinds[i].size,
2826                                                 mem ? mem->bo : NULL,
2827                                                 bind->pBinds[i].memoryOffset);
2828         }
2829 }
2830
2831 static void
2832 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2833                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
2834 {
2835         RADV_FROM_HANDLE(radv_image, image, bind->image);
2836
2837         for (uint32_t i = 0; i < bind->bindCount; ++i) {
2838                 struct radv_device_memory *mem = NULL;
2839
2840                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2841                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2842
2843                 device->ws->buffer_virtual_bind(image->bo,
2844                                                 bind->pBinds[i].resourceOffset,
2845                                                 bind->pBinds[i].size,
2846                                                 mem ? mem->bo : NULL,
2847                                                 bind->pBinds[i].memoryOffset);
2848         }
2849 }
2850
2851  VkResult radv_QueueBindSparse(
2852         VkQueue                                     _queue,
2853         uint32_t                                    bindInfoCount,
2854         const VkBindSparseInfo*                     pBindInfo,
2855         VkFence                                     _fence)
2856 {
2857         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2858         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2859         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2860         bool fence_emitted = false;
2861
2862         for (uint32_t i = 0; i < bindInfoCount; ++i) {
2863                 struct radv_winsys_sem_info sem_info;
2864                 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2865                         radv_sparse_buffer_bind_memory(queue->device,
2866                                                        pBindInfo[i].pBufferBinds + j);
2867                 }
2868
2869                 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2870                         radv_sparse_image_opaque_bind_memory(queue->device,
2871                                                              pBindInfo[i].pImageOpaqueBinds + j);
2872                 }
2873
2874                 VkResult result;
2875                 result = radv_alloc_sem_info(&sem_info,
2876                                              pBindInfo[i].waitSemaphoreCount,
2877                                              pBindInfo[i].pWaitSemaphores,
2878                                              pBindInfo[i].signalSemaphoreCount,
2879                                              pBindInfo[i].pSignalSemaphores,
2880                                              _fence);
2881                 if (result != VK_SUCCESS)
2882                         return result;
2883
2884                 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2885                         queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2886                                                      &queue->device->empty_cs[queue->queue_family_index],
2887                                                      1, NULL, NULL,
2888                                                      &sem_info,
2889                                                      false, base_fence);
2890                         fence_emitted = true;
2891                         if (fence)
2892                                 fence->submitted = true;
2893                 }
2894
2895                 radv_free_sem_info(&sem_info);
2896
2897         }
2898
2899         if (fence) {
2900                 if (!fence_emitted) {
2901                         radv_signal_fence(queue, fence);
2902                 }
2903                 fence->submitted = true;
2904         }
2905
2906         return VK_SUCCESS;
2907 }
2908
2909 VkResult radv_CreateFence(
2910         VkDevice                                    _device,
2911         const VkFenceCreateInfo*                    pCreateInfo,
2912         const VkAllocationCallbacks*                pAllocator,
2913         VkFence*                                    pFence)
2914 {
2915         RADV_FROM_HANDLE(radv_device, device, _device);
2916         const VkExportFenceCreateInfoKHR *export =
2917                 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
2918         VkExternalFenceHandleTypeFlagsKHR handleTypes =
2919                 export ? export->handleTypes : 0;
2920
2921         struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2922                                                sizeof(*fence), 8,
2923                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2924
2925         if (!fence)
2926                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2927
2928         fence->submitted = false;
2929         fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2930         fence->temp_syncobj = 0;
2931         if (device->always_use_syncobj || handleTypes) {
2932                 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
2933                 if (ret) {
2934                         vk_free2(&device->alloc, pAllocator, fence);
2935                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2936                 }
2937                 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
2938                         device->ws->signal_syncobj(device->ws, fence->syncobj);
2939                 }
2940                 fence->fence = NULL;
2941         } else {
2942                 fence->fence = device->ws->create_fence();
2943                 if (!fence->fence) {
2944                         vk_free2(&device->alloc, pAllocator, fence);
2945                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2946                 }
2947                 fence->syncobj = 0;
2948         }
2949
2950         *pFence = radv_fence_to_handle(fence);
2951
2952         return VK_SUCCESS;
2953 }
2954
2955 void radv_DestroyFence(
2956         VkDevice                                    _device,
2957         VkFence                                     _fence,
2958         const VkAllocationCallbacks*                pAllocator)
2959 {
2960         RADV_FROM_HANDLE(radv_device, device, _device);
2961         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2962
2963         if (!fence)
2964                 return;
2965
2966         if (fence->temp_syncobj)
2967                 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
2968         if (fence->syncobj)
2969                 device->ws->destroy_syncobj(device->ws, fence->syncobj);
2970         if (fence->fence)
2971                 device->ws->destroy_fence(fence->fence);
2972         vk_free2(&device->alloc, pAllocator, fence);
2973 }
2974
2975
2976 static uint64_t radv_get_current_time()
2977 {
2978         struct timespec tv;
2979         clock_gettime(CLOCK_MONOTONIC, &tv);
2980         return tv.tv_nsec + tv.tv_sec*1000000000ull;
2981 }
2982
2983 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2984 {
2985         uint64_t current_time = radv_get_current_time();
2986
2987         timeout = MIN2(UINT64_MAX - current_time, timeout);
2988
2989         return current_time + timeout;
2990 }
2991
2992
2993 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
2994 {
2995         for (uint32_t i = 0; i < fenceCount; ++i) {
2996                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2997                 if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
2998                         return false;
2999         }
3000         return true;
3001 }
3002
3003 VkResult radv_WaitForFences(
3004         VkDevice                                    _device,
3005         uint32_t                                    fenceCount,
3006         const VkFence*                              pFences,
3007         VkBool32                                    waitAll,
3008         uint64_t                                    timeout)
3009 {
3010         RADV_FROM_HANDLE(radv_device, device, _device);
3011         timeout = radv_get_absolute_timeout(timeout);
3012
3013         if (device->always_use_syncobj) {
3014                 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3015                 if (!handles)
3016                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3017
3018                 for (uint32_t i = 0; i < fenceCount; ++i) {
3019                         RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3020                         handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3021                 }
3022
3023                 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3024
3025                 free(handles);
3026                 return success ? VK_SUCCESS : VK_TIMEOUT;
3027         }
3028
3029         if (!waitAll && fenceCount > 1) {
3030                 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3031                 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3032                         uint32_t wait_count = 0;
3033                         struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3034                         if (!fences)
3035                                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3036
3037                         for (uint32_t i = 0; i < fenceCount; ++i) {
3038                                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3039
3040                                 if (fence->signalled) {
3041                                         free(fences);
3042                                         return VK_SUCCESS;
3043                                 }
3044
3045                                 fences[wait_count++] = fence->fence;
3046                         }
3047
3048                         bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3049                                                                waitAll, timeout - radv_get_current_time());
3050
3051                         free(fences);
3052                         return success ? VK_SUCCESS : VK_TIMEOUT;
3053                 }
3054
3055                 while(radv_get_current_time() <= timeout) {
3056                         for (uint32_t i = 0; i < fenceCount; ++i) {
3057                                 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3058                                         return VK_SUCCESS;
3059                         }
3060                 }
3061                 return VK_TIMEOUT;
3062         }
3063
3064         for (uint32_t i = 0; i < fenceCount; ++i) {
3065                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3066                 bool expired = false;
3067
3068                 if (fence->temp_syncobj) {
3069                         if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3070                                 return VK_TIMEOUT;
3071                         continue;
3072                 }
3073
3074                 if (fence->syncobj) {
3075                         if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3076                                 return VK_TIMEOUT;
3077                         continue;
3078                 }
3079
3080                 if (fence->signalled)
3081                         continue;
3082
3083                 if (!fence->submitted) {
3084                         while(radv_get_current_time() <= timeout && !fence->submitted)
3085                                 /* Do nothing */;
3086
3087                         if (!fence->submitted)
3088                                 return VK_TIMEOUT;
3089
3090                         /* Recheck as it may have been set by submitting operations. */
3091                         if (fence->signalled)
3092                                 continue;
3093                 }
3094
3095                 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
3096                 if (!expired)
3097                         return VK_TIMEOUT;
3098
3099                 fence->signalled = true;
3100         }
3101
3102         return VK_SUCCESS;
3103 }
3104
3105 VkResult radv_ResetFences(VkDevice _device,
3106                           uint32_t fenceCount,
3107                           const VkFence *pFences)
3108 {
3109         RADV_FROM_HANDLE(radv_device, device, _device);
3110
3111         for (unsigned i = 0; i < fenceCount; ++i) {
3112                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3113                 fence->submitted = fence->signalled = false;
3114
3115                 /* Per spec, we first restore the permanent payload, and then reset, so
3116                  * having a temp syncobj should not skip resetting the permanent syncobj. */
3117                 if (fence->temp_syncobj) {
3118                         device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3119                         fence->temp_syncobj = 0;
3120                 }
3121
3122                 if (fence->syncobj) {
3123                         device->ws->reset_syncobj(device->ws, fence->syncobj);
3124                 }
3125         }
3126
3127         return VK_SUCCESS;
3128 }
3129
3130 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3131 {
3132         RADV_FROM_HANDLE(radv_device, device, _device);
3133         RADV_FROM_HANDLE(radv_fence, fence, _fence);
3134
3135         if (fence->temp_syncobj) {
3136                         bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3137                         return success ? VK_SUCCESS : VK_NOT_READY;
3138         }
3139
3140         if (fence->syncobj) {
3141                         bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3142                         return success ? VK_SUCCESS : VK_NOT_READY;
3143         }
3144
3145         if (fence->signalled)
3146                 return VK_SUCCESS;
3147         if (!fence->submitted)
3148                 return VK_NOT_READY;
3149         if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3150                 return VK_NOT_READY;
3151
3152         return VK_SUCCESS;
3153 }
3154
3155
3156 // Queue semaphore functions
3157
3158 VkResult radv_CreateSemaphore(
3159         VkDevice                                    _device,
3160         const VkSemaphoreCreateInfo*                pCreateInfo,
3161         const VkAllocationCallbacks*                pAllocator,
3162         VkSemaphore*                                pSemaphore)
3163 {
3164         RADV_FROM_HANDLE(radv_device, device, _device);
3165         const VkExportSemaphoreCreateInfoKHR *export =
3166                 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3167         VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3168                 export ? export->handleTypes : 0;
3169
3170         struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3171                                                sizeof(*sem), 8,
3172                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3173         if (!sem)
3174                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3175
3176         sem->temp_syncobj = 0;
3177         /* create a syncobject if we are going to export this semaphore */
3178         if (device->always_use_syncobj || handleTypes) {
3179                 assert (device->physical_device->rad_info.has_syncobj);
3180                 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3181                 if (ret) {
3182                         vk_free2(&device->alloc, pAllocator, sem);
3183                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3184                 }
3185                 sem->sem = NULL;
3186         } else {
3187                 sem->sem = device->ws->create_sem(device->ws);
3188                 if (!sem->sem) {
3189                         vk_free2(&device->alloc, pAllocator, sem);
3190                         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3191                 }
3192                 sem->syncobj = 0;
3193         }
3194
3195         *pSemaphore = radv_semaphore_to_handle(sem);
3196         return VK_SUCCESS;
3197 }
3198
3199 void radv_DestroySemaphore(
3200         VkDevice                                    _device,
3201         VkSemaphore                                 _semaphore,
3202         const VkAllocationCallbacks*                pAllocator)
3203 {
3204         RADV_FROM_HANDLE(radv_device, device, _device);
3205         RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3206         if (!_semaphore)
3207                 return;
3208
3209         if (sem->syncobj)
3210                 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3211         else
3212                 device->ws->destroy_sem(sem->sem);
3213         vk_free2(&device->alloc, pAllocator, sem);
3214 }
3215
3216 VkResult radv_CreateEvent(
3217         VkDevice                                    _device,
3218         const VkEventCreateInfo*                    pCreateInfo,
3219         const VkAllocationCallbacks*                pAllocator,
3220         VkEvent*                                    pEvent)
3221 {
3222         RADV_FROM_HANDLE(radv_device, device, _device);
3223         struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3224                                                sizeof(*event), 8,
3225                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3226
3227         if (!event)
3228                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3229
3230         event->bo = device->ws->buffer_create(device->ws, 8, 8,
3231                                               RADEON_DOMAIN_GTT,
3232                                               RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3233         if (!event->bo) {
3234                 vk_free2(&device->alloc, pAllocator, event);
3235                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3236         }
3237
3238         event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3239
3240         *pEvent = radv_event_to_handle(event);
3241
3242         return VK_SUCCESS;
3243 }
3244
3245 void radv_DestroyEvent(
3246         VkDevice                                    _device,
3247         VkEvent                                     _event,
3248         const VkAllocationCallbacks*                pAllocator)
3249 {
3250         RADV_FROM_HANDLE(radv_device, device, _device);
3251         RADV_FROM_HANDLE(radv_event, event, _event);
3252
3253         if (!event)
3254                 return;
3255         device->ws->buffer_destroy(event->bo);
3256         vk_free2(&device->alloc, pAllocator, event);
3257 }
3258
3259 VkResult radv_GetEventStatus(
3260         VkDevice                                    _device,
3261         VkEvent                                     _event)
3262 {
3263         RADV_FROM_HANDLE(radv_event, event, _event);
3264
3265         if (*event->map == 1)
3266                 return VK_EVENT_SET;
3267         return VK_EVENT_RESET;
3268 }
3269
3270 VkResult radv_SetEvent(
3271         VkDevice                                    _device,
3272         VkEvent                                     _event)
3273 {
3274         RADV_FROM_HANDLE(radv_event, event, _event);
3275         *event->map = 1;
3276
3277         return VK_SUCCESS;
3278 }
3279
3280 VkResult radv_ResetEvent(
3281     VkDevice                                    _device,
3282     VkEvent                                     _event)
3283 {
3284         RADV_FROM_HANDLE(radv_event, event, _event);
3285         *event->map = 0;
3286
3287         return VK_SUCCESS;
3288 }
3289
3290 VkResult radv_CreateBuffer(
3291         VkDevice                                    _device,
3292         const VkBufferCreateInfo*                   pCreateInfo,
3293         const VkAllocationCallbacks*                pAllocator,
3294         VkBuffer*                                   pBuffer)
3295 {
3296         RADV_FROM_HANDLE(radv_device, device, _device);
3297         struct radv_buffer *buffer;
3298
3299         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3300
3301         buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3302                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3303         if (buffer == NULL)
3304                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3305
3306         buffer->size = pCreateInfo->size;
3307         buffer->usage = pCreateInfo->usage;
3308         buffer->bo = NULL;
3309         buffer->offset = 0;
3310         buffer->flags = pCreateInfo->flags;
3311
3312         buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3313                                                  EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3314
3315         if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3316                 buffer->bo = device->ws->buffer_create(device->ws,
3317                                                        align64(buffer->size, 4096),
3318                                                        4096, 0, RADEON_FLAG_VIRTUAL);
3319                 if (!buffer->bo) {
3320                         vk_free2(&device->alloc, pAllocator, buffer);
3321                         return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3322                 }
3323         }
3324
3325         *pBuffer = radv_buffer_to_handle(buffer);
3326
3327         return VK_SUCCESS;
3328 }
3329
3330 void radv_DestroyBuffer(
3331         VkDevice                                    _device,
3332         VkBuffer                                    _buffer,
3333         const VkAllocationCallbacks*                pAllocator)
3334 {
3335         RADV_FROM_HANDLE(radv_device, device, _device);
3336         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3337
3338         if (!buffer)
3339                 return;
3340
3341         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3342                 device->ws->buffer_destroy(buffer->bo);
3343
3344         vk_free2(&device->alloc, pAllocator, buffer);
3345 }
3346
3347 static inline unsigned
3348 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3349 {
3350         if (stencil)
3351                 return image->surface.u.legacy.stencil_tiling_index[level];
3352         else
3353                 return image->surface.u.legacy.tiling_index[level];
3354 }
3355
3356 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3357 {
3358         return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3359 }
3360
3361 static void
3362 radv_initialise_color_surface(struct radv_device *device,
3363                               struct radv_color_buffer_info *cb,
3364                               struct radv_image_view *iview)
3365 {
3366         const struct vk_format_description *desc;
3367         unsigned ntype, format, swap, endian;
3368         unsigned blend_clamp = 0, blend_bypass = 0;
3369         uint64_t va;
3370         const struct radeon_surf *surf = &iview->image->surface;
3371
3372         desc = vk_format_description(iview->vk_format);
3373
3374         memset(cb, 0, sizeof(*cb));
3375
3376         /* Intensity is implemented as Red, so treat it that way. */
3377         cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3378
3379         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3380
3381         cb->cb_color_base = va >> 8;
3382
3383         if (device->physical_device->rad_info.chip_class >= GFX9) {
3384                 struct gfx9_surf_meta_flags meta;
3385                 if (iview->image->dcc_offset)
3386                         meta = iview->image->surface.u.gfx9.dcc;
3387                 else
3388                         meta = iview->image->surface.u.gfx9.cmask;
3389
3390                 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3391                         S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3392                         S_028C74_RB_ALIGNED(meta.rb_aligned) |
3393                         S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3394
3395                 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3396                 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3397         } else {
3398                 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3399                 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3400
3401                 cb->cb_color_base += level_info->offset >> 8;
3402                 if (level_info->mode == RADEON_SURF_MODE_2D)
3403                         cb->cb_color_base |= iview->image->surface.tile_swizzle;
3404
3405                 pitch_tile_max = level_info->nblk_x / 8 - 1;
3406                 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3407                 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3408
3409                 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3410                 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3411                 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3412
3413                 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3414
3415                 if (iview->image->fmask.size) {
3416                         if (device->physical_device->rad_info.chip_class >= CIK)
3417                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3418                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3419                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3420                 } else {
3421                         /* This must be set for fast clear to work without FMASK. */
3422                         if (device->physical_device->rad_info.chip_class >= CIK)
3423                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3424                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3425                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3426                 }
3427         }
3428
3429         /* CMASK variables */
3430         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3431         va += iview->image->cmask.offset;
3432         cb->cb_color_cmask = va >> 8;
3433
3434         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3435         va += iview->image->dcc_offset;
3436         cb->cb_dcc_base = va >> 8;
3437         cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3438
3439         uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3440         cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3441                 S_028C6C_SLICE_MAX(max_slice);
3442
3443         if (iview->image->info.samples > 1) {
3444                 unsigned log_samples = util_logbase2(iview->image->info.samples);
3445
3446                 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3447                         S_028C74_NUM_FRAGMENTS(log_samples);
3448         }
3449
3450         if (iview->image->fmask.size) {
3451                 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3452                 cb->cb_color_fmask = va >> 8;
3453                 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3454         } else {
3455                 cb->cb_color_fmask = cb->cb_color_base;
3456         }
3457
3458         ntype = radv_translate_color_numformat(iview->vk_format,
3459                                                desc,
3460                                                vk_format_get_first_non_void_channel(iview->vk_format));
3461         format = radv_translate_colorformat(iview->vk_format);
3462         if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3463                 radv_finishme("Illegal color\n");
3464         swap = radv_translate_colorswap(iview->vk_format, FALSE);
3465         endian = radv_colorformat_endian_swap(format);
3466
3467         /* blend clamp should be set for all NORM/SRGB types */
3468         if (ntype == V_028C70_NUMBER_UNORM ||
3469             ntype == V_028C70_NUMBER_SNORM ||
3470             ntype == V_028C70_NUMBER_SRGB)
3471                 blend_clamp = 1;
3472
3473         /* set blend bypass according to docs if SINT/UINT or
3474            8/24 COLOR variants */
3475         if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3476             format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3477             format == V_028C70_COLOR_X24_8_32_FLOAT) {
3478                 blend_clamp = 0;
3479                 blend_bypass = 1;
3480         }
3481 #if 0
3482         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3483             (format == V_028C70_COLOR_8 ||
3484              format == V_028C70_COLOR_8_8 ||
3485              format == V_028C70_COLOR_8_8_8_8))
3486                 ->color_is_int8 = true;
3487 #endif
3488         cb->cb_color_info = S_028C70_FORMAT(format) |
3489                 S_028C70_COMP_SWAP(swap) |
3490                 S_028C70_BLEND_CLAMP(blend_clamp) |
3491                 S_028C70_BLEND_BYPASS(blend_bypass) |
3492                 S_028C70_SIMPLE_FLOAT(1) |
3493                 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3494                                     ntype != V_028C70_NUMBER_SNORM &&
3495                                     ntype != V_028C70_NUMBER_SRGB &&
3496                                     format != V_028C70_COLOR_8_24 &&
3497                                     format != V_028C70_COLOR_24_8) |
3498                 S_028C70_NUMBER_TYPE(ntype) |
3499                 S_028C70_ENDIAN(endian);
3500         if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3501                 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3502                 if (device->physical_device->rad_info.chip_class == SI) {
3503                         unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3504                         cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3505                 }
3506         }
3507
3508         if (iview->image->cmask.size &&
3509             !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3510                 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3511
3512         if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3513                 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3514
3515         if (device->physical_device->rad_info.chip_class >= VI) {
3516                 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3517                 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3518                 unsigned independent_64b_blocks = 0;
3519                 unsigned max_compressed_block_size;
3520
3521                 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
3522                    64 for APU because all of our APUs to date use DIMMs which have
3523                    a request granularity size of 64B while all other chips have a
3524                    32B request size */
3525                 if (!device->physical_device->rad_info.has_dedicated_vram)
3526                         min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3527
3528                 if (iview->image->info.samples > 1) {
3529                         if (iview->image->surface.bpe == 1)
3530                                 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3531                         else if (iview->image->surface.bpe == 2)
3532                                 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3533                 }
3534
3535                 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3536                                            VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3537                         independent_64b_blocks = 1;
3538                         max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3539                 } else
3540                         max_compressed_block_size = max_uncompressed_block_size;
3541
3542                 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3543                         S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3544                         S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3545                         S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3546         }
3547
3548         /* This must be set for fast clear to work without FMASK. */
3549         if (!iview->image->fmask.size &&
3550             device->physical_device->rad_info.chip_class == SI) {
3551                 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3552                 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3553         }
3554
3555         if (device->physical_device->rad_info.chip_class >= GFX9) {
3556                 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3557                   (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3558
3559                 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3560                 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3561                         S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3562                 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3563                         S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3564                         S_028C68_MAX_MIP(iview->image->info.levels - 1);
3565         }
3566 }
3567
3568 static void
3569 radv_initialise_ds_surface(struct radv_device *device,
3570                            struct radv_ds_buffer_info *ds,
3571                            struct radv_image_view *iview)
3572 {
3573         unsigned level = iview->base_mip;
3574         unsigned format, stencil_format;
3575         uint64_t va, s_offs, z_offs;
3576         bool stencil_only = false;
3577         memset(ds, 0, sizeof(*ds));
3578         switch (iview->image->vk_format) {
3579         case VK_FORMAT_D24_UNORM_S8_UINT:
3580         case VK_FORMAT_X8_D24_UNORM_PACK32:
3581                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3582                 ds->offset_scale = 2.0f;
3583                 break;
3584         case VK_FORMAT_D16_UNORM:
3585         case VK_FORMAT_D16_UNORM_S8_UINT:
3586                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3587                 ds->offset_scale = 4.0f;
3588                 break;
3589         case VK_FORMAT_D32_SFLOAT:
3590         case VK_FORMAT_D32_SFLOAT_S8_UINT:
3591                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3592                         S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3593                 ds->offset_scale = 1.0f;
3594                 break;
3595         case VK_FORMAT_S8_UINT:
3596                 stencil_only = true;
3597                 break;
3598         default:
3599                 break;
3600         }
3601
3602         format = radv_translate_dbformat(iview->image->vk_format);
3603         stencil_format = iview->image->surface.has_stencil ?
3604                 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3605
3606         uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3607         ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3608                 S_028008_SLICE_MAX(max_slice);
3609
3610         ds->db_htile_data_base = 0;
3611         ds->db_htile_surface = 0;
3612
3613         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3614         s_offs = z_offs = va;
3615
3616         if (device->physical_device->rad_info.chip_class >= GFX9) {
3617                 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3618                 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3619
3620                 ds->db_z_info = S_028038_FORMAT(format) |
3621                         S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3622                         S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3623                         S_028038_MAXMIP(iview->image->info.levels - 1);
3624                 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3625                         S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3626
3627                 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3628                 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3629                 ds->db_depth_view |= S_028008_MIPID(level);
3630
3631                 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3632                         S_02801C_Y_MAX(iview->image->info.height - 1);
3633
3634                 if (radv_htile_enabled(iview->image, level)) {
3635                         ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3636
3637                         if (iview->image->tc_compatible_htile) {
3638                                 unsigned max_zplanes = 4;
3639
3640                                 if (iview->vk_format == VK_FORMAT_D16_UNORM  &&
3641                                     iview->image->info.samples > 1)
3642                                         max_zplanes = 2;
3643
3644                                 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3645                                           S_028038_ITERATE_FLUSH(1);
3646                                 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3647                         }
3648
3649                         if (!iview->image->surface.has_stencil)
3650                                 /* Use all of the htile_buffer for depth if there's no stencil. */
3651                                 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3652                         va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3653                                 iview->image->htile_offset;
3654                         ds->db_htile_data_base = va >> 8;
3655                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3656                                 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3657                                 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3658                 }
3659         } else {
3660                 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3661
3662                 if (stencil_only)
3663                         level_info = &iview->image->surface.u.legacy.stencil_level[level];
3664
3665                 z_offs += iview->image->surface.u.legacy.level[level].offset;
3666                 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3667
3668                 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3669                 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3670                 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3671
3672                 if (iview->image->info.samples > 1)
3673                         ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3674
3675                 if (device->physical_device->rad_info.chip_class >= CIK) {
3676                         struct radeon_info *info = &device->physical_device->rad_info;
3677                         unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3678                         unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3679                         unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3680                         unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3681                         unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3682                         unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3683
3684                         if (stencil_only)
3685                                 tile_mode = stencil_tile_mode;
3686
3687                         ds->db_depth_info |=
3688                                 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3689                                 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3690                                 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3691                                 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3692                                 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3693                                 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3694                         ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3695                         ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3696                 } else {
3697                         unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3698                         ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3699                         tile_mode_index = si_tile_mode_index(iview->image, level, true);
3700                         ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3701                         if (stencil_only)
3702                                 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3703                 }
3704
3705                 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3706                         S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3707                 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3708
3709                 if (radv_htile_enabled(iview->image, level)) {
3710                         ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3711
3712                         if (!iview->image->surface.has_stencil &&
3713                             !iview->image->tc_compatible_htile)
3714                                 /* Use all of the htile_buffer for depth if there's no stencil. */
3715                                 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3716
3717                         va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3718                                 iview->image->htile_offset;
3719                         ds->db_htile_data_base = va >> 8;
3720                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3721
3722                         if (iview->image->tc_compatible_htile) {
3723                                 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3724
3725                                 if (iview->image->info.samples <= 1)
3726                                         ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3727                                 else if (iview->image->info.samples <= 4)
3728                                         ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3729                                 else
3730                                         ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3731                         }
3732                 }
3733         }
3734
3735         ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3736         ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3737 }
3738
3739 VkResult radv_CreateFramebuffer(
3740         VkDevice                                    _device,
3741         const VkFramebufferCreateInfo*              pCreateInfo,
3742         const VkAllocationCallbacks*                pAllocator,
3743         VkFramebuffer*                              pFramebuffer)
3744 {
3745         RADV_FROM_HANDLE(radv_device, device, _device);
3746         struct radv_framebuffer *framebuffer;
3747
3748         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3749
3750         size_t size = sizeof(*framebuffer) +
3751                 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3752         framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3753                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3754         if (framebuffer == NULL)
3755                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3756
3757         framebuffer->attachment_count = pCreateInfo->attachmentCount;
3758         framebuffer->width = pCreateInfo->width;
3759         framebuffer->height = pCreateInfo->height;
3760         framebuffer->layers = pCreateInfo->layers;
3761         for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3762                 VkImageView _iview = pCreateInfo->pAttachments[i];
3763                 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3764                 framebuffer->attachments[i].attachment = iview;
3765                 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3766                         radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3767                 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3768                         radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3769                 }
3770                 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3771                 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3772                 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
3773         }
3774
3775         *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3776         return VK_SUCCESS;
3777 }
3778
3779 void radv_DestroyFramebuffer(
3780         VkDevice                                    _device,
3781         VkFramebuffer                               _fb,
3782         const VkAllocationCallbacks*                pAllocator)
3783 {
3784         RADV_FROM_HANDLE(radv_device, device, _device);
3785         RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3786
3787         if (!fb)
3788                 return;
3789         vk_free2(&device->alloc, pAllocator, fb);
3790 }
3791
3792 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3793 {
3794         switch (address_mode) {
3795         case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3796                 return V_008F30_SQ_TEX_WRAP;
3797         case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3798                 return V_008F30_SQ_TEX_MIRROR;
3799         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3800                 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3801         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3802                 return V_008F30_SQ_TEX_CLAMP_BORDER;
3803         case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3804                 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3805         default:
3806                 unreachable("illegal tex wrap mode");
3807                 break;
3808         }
3809 }
3810
3811 static unsigned
3812 radv_tex_compare(VkCompareOp op)
3813 {
3814         switch (op) {
3815         case VK_COMPARE_OP_NEVER:
3816                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3817         case VK_COMPARE_OP_LESS:
3818                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3819         case VK_COMPARE_OP_EQUAL:
3820                 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3821         case VK_COMPARE_OP_LESS_OR_EQUAL:
3822                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3823         case VK_COMPARE_OP_GREATER:
3824                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3825         case VK_COMPARE_OP_NOT_EQUAL:
3826                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3827         case VK_COMPARE_OP_GREATER_OR_EQUAL:
3828                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3829         case VK_COMPARE_OP_ALWAYS:
3830                 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3831         default:
3832                 unreachable("illegal compare mode");
3833                 break;
3834         }
3835 }
3836
3837 static unsigned
3838 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3839 {
3840         switch (filter) {
3841         case VK_FILTER_NEAREST:
3842                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3843                         V_008F38_SQ_TEX_XY_FILTER_POINT);
3844         case VK_FILTER_LINEAR:
3845                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3846                         V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3847         case VK_FILTER_CUBIC_IMG:
3848         default:
3849                 fprintf(stderr, "illegal texture filter");
3850                 return 0;
3851         }
3852 }
3853
3854 static unsigned
3855 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3856 {
3857         switch (mode) {
3858         case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3859                 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3860         case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3861                 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3862         default:
3863                 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3864         }
3865 }
3866
3867 static unsigned
3868 radv_tex_bordercolor(VkBorderColor bcolor)
3869 {
3870         switch (bcolor) {
3871         case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3872         case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3873                 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3874         case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3875         case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3876                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3877         case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3878         case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3879                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3880         default:
3881                 break;
3882         }
3883         return 0;
3884 }
3885
3886 static unsigned
3887 radv_tex_aniso_filter(unsigned filter)
3888 {
3889         if (filter < 2)
3890                 return 0;
3891         if (filter < 4)
3892                 return 1;
3893         if (filter < 8)
3894                 return 2;
3895         if (filter < 16)
3896                 return 3;
3897         return 4;
3898 }
3899
3900 static void
3901 radv_init_sampler(struct radv_device *device,
3902                   struct radv_sampler *sampler,
3903                   const VkSamplerCreateInfo *pCreateInfo)
3904 {
3905         uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3906                                         (uint32_t) pCreateInfo->maxAnisotropy : 0;
3907         uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3908         bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3909
3910         sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3911                              S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3912                              S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3913                              S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3914                              S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3915                              S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3916                              S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3917                              S_008F30_ANISO_BIAS(max_aniso_ratio) |
3918                              S_008F30_DISABLE_CUBE_WRAP(0) |
3919                              S_008F30_COMPAT_MODE(is_vi));
3920         sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3921                              S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3922                              S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3923         sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3924                              S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3925                              S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3926                              S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3927                              S_008F38_MIP_POINT_PRECLAMP(0) |
3928                              S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
3929                              S_008F38_FILTER_PREC_FIX(1) |
3930                              S_008F38_ANISO_OVERRIDE(is_vi));
3931         sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3932                              S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3933 }
3934
3935 VkResult radv_CreateSampler(
3936         VkDevice                                    _device,
3937         const VkSamplerCreateInfo*                  pCreateInfo,
3938         const VkAllocationCallbacks*                pAllocator,
3939         VkSampler*                                  pSampler)
3940 {
3941         RADV_FROM_HANDLE(radv_device, device, _device);
3942         struct radv_sampler *sampler;
3943
3944         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3945
3946         sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3947                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3948         if (!sampler)
3949                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3950
3951         radv_init_sampler(device, sampler, pCreateInfo);
3952         *pSampler = radv_sampler_to_handle(sampler);
3953
3954         return VK_SUCCESS;
3955 }
3956
3957 void radv_DestroySampler(
3958         VkDevice                                    _device,
3959         VkSampler                                   _sampler,
3960         const VkAllocationCallbacks*                pAllocator)
3961 {
3962         RADV_FROM_HANDLE(radv_device, device, _device);
3963         RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3964
3965         if (!sampler)
3966                 return;
3967         vk_free2(&device->alloc, pAllocator, sampler);
3968 }
3969
3970 /* vk_icd.h does not declare this function, so we declare it here to
3971  * suppress Wmissing-prototypes.
3972  */
3973 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3974 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3975
3976 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3977 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3978 {
3979         /* For the full details on loader interface versioning, see
3980         * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3981         * What follows is a condensed summary, to help you navigate the large and
3982         * confusing official doc.
3983         *
3984         *   - Loader interface v0 is incompatible with later versions. We don't
3985         *     support it.
3986         *
3987         *   - In loader interface v1:
3988         *       - The first ICD entrypoint called by the loader is
3989         *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3990         *         entrypoint.
3991         *       - The ICD must statically expose no other Vulkan symbol unless it is
3992         *         linked with -Bsymbolic.
3993         *       - Each dispatchable Vulkan handle created by the ICD must be
3994         *         a pointer to a struct whose first member is VK_LOADER_DATA. The
3995         *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3996         *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3997         *         vkDestroySurfaceKHR(). The ICD must be capable of working with
3998         *         such loader-managed surfaces.
3999         *
4000         *    - Loader interface v2 differs from v1 in:
4001         *       - The first ICD entrypoint called by the loader is
4002         *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4003         *         statically expose this entrypoint.
4004         *
4005         *    - Loader interface v3 differs from v2 in:
4006         *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4007         *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4008         *          because the loader no longer does so.
4009         */
4010         *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4011         return VK_SUCCESS;
4012 }
4013
4014 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4015                              const VkMemoryGetFdInfoKHR *pGetFdInfo,
4016                              int *pFD)
4017 {
4018         RADV_FROM_HANDLE(radv_device, device, _device);
4019         RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4020
4021         assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4022
4023         /* At the moment, we support only the below handle types. */
4024         assert(pGetFdInfo->handleType ==
4025                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4026                pGetFdInfo->handleType ==
4027                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4028
4029         bool ret = radv_get_memory_fd(device, memory, pFD);
4030         if (ret == false)
4031                 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
4032         return VK_SUCCESS;
4033 }
4034
4035 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4036                                        VkExternalMemoryHandleTypeFlagBitsKHR handleType,
4037                                        int fd,
4038                                        VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4039 {
4040    switch (handleType) {
4041    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4042       pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4043       return VK_SUCCESS;
4044
4045    default:
4046       /* The valid usage section for this function says:
4047        *
4048        *    "handleType must not be one of the handle types defined as
4049        *    opaque."
4050        *
4051        * So opaque handle types fall into the default "unsupported" case.
4052        */
4053       return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4054    }
4055 }
4056
4057 static VkResult radv_import_opaque_fd(struct radv_device *device,
4058                                       int fd,
4059                                       uint32_t *syncobj)
4060 {
4061         uint32_t syncobj_handle = 0;
4062         int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4063         if (ret != 0)
4064                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4065
4066         if (*syncobj)
4067                 device->ws->destroy_syncobj(device->ws, *syncobj);
4068
4069         *syncobj = syncobj_handle;
4070         close(fd);
4071
4072         return VK_SUCCESS;
4073 }
4074
4075 static VkResult radv_import_sync_fd(struct radv_device *device,
4076                                     int fd,
4077                                     uint32_t *syncobj)
4078 {
4079         /* If we create a syncobj we do it locally so that if we have an error, we don't
4080          * leave a syncobj in an undetermined state in the fence. */
4081         uint32_t syncobj_handle =  *syncobj;
4082         if (!syncobj_handle) {
4083                 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4084                 if (ret) {
4085                         return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4086                 }
4087         }
4088
4089         if (fd == -1) {
4090                 device->ws->signal_syncobj(device->ws, syncobj_handle);
4091         } else {
4092                 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4093         if (ret != 0)
4094                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4095         }
4096
4097         *syncobj = syncobj_handle;
4098         if (fd != -1)
4099                 close(fd);
4100
4101         return VK_SUCCESS;
4102 }
4103
4104 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4105                                    const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4106 {
4107         RADV_FROM_HANDLE(radv_device, device, _device);
4108         RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4109         uint32_t *syncobj_dst = NULL;
4110
4111         if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
4112                 syncobj_dst = &sem->temp_syncobj;
4113         } else {
4114                 syncobj_dst = &sem->syncobj;
4115         }
4116
4117         switch(pImportSemaphoreFdInfo->handleType) {
4118                 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4119                         return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4120                 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4121                         return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4122                 default:
4123                         unreachable("Unhandled semaphore handle type");
4124         }
4125 }
4126
4127 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4128                                 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4129                                 int *pFd)
4130 {
4131         RADV_FROM_HANDLE(radv_device, device, _device);
4132         RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4133         int ret;
4134         uint32_t syncobj_handle;
4135
4136         if (sem->temp_syncobj)
4137                 syncobj_handle = sem->temp_syncobj;
4138         else
4139                 syncobj_handle = sem->syncobj;
4140
4141         switch(pGetFdInfo->handleType) {
4142         case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4143                 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4144                 break;
4145         case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4146                 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4147                 if (!ret) {
4148                         if (sem->temp_syncobj) {
4149                                 close (sem->temp_syncobj);
4150                                 sem->temp_syncobj = 0;
4151                         } else {
4152                                 device->ws->reset_syncobj(device->ws, syncobj_handle);
4153                         }
4154                 }
4155                 break;
4156         default:
4157                 unreachable("Unhandled semaphore handle type");
4158         }
4159
4160         if (ret)
4161                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4162         return VK_SUCCESS;
4163 }
4164
4165 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4166         VkPhysicalDevice                            physicalDevice,
4167         const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4168         VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
4169 {
4170         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4171
4172         /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4173         if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4174             (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
4175              pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4176                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4177                 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4178                 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4179                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4180         } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4181                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4182                 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4183                 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4184                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4185         } else {
4186                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4187                 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4188                 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4189         }
4190 }
4191
4192 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4193                                    const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4194 {
4195         RADV_FROM_HANDLE(radv_device, device, _device);
4196         RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4197         uint32_t *syncobj_dst = NULL;
4198
4199
4200         if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4201                 syncobj_dst = &fence->temp_syncobj;
4202         } else {
4203                 syncobj_dst = &fence->syncobj;
4204         }
4205
4206         switch(pImportFenceFdInfo->handleType) {
4207                 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4208                         return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4209                 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4210                         return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4211                 default:
4212                         unreachable("Unhandled fence handle type");
4213         }
4214 }
4215
4216 VkResult radv_GetFenceFdKHR(VkDevice _device,
4217                                 const VkFenceGetFdInfoKHR *pGetFdInfo,
4218                                 int *pFd)
4219 {
4220         RADV_FROM_HANDLE(radv_device, device, _device);
4221         RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4222         int ret;
4223         uint32_t syncobj_handle;
4224
4225         if (fence->temp_syncobj)
4226                 syncobj_handle = fence->temp_syncobj;
4227         else
4228                 syncobj_handle = fence->syncobj;
4229
4230         switch(pGetFdInfo->handleType) {
4231         case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4232                 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4233                 break;
4234         case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4235                 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4236                 if (!ret) {
4237                         if (fence->temp_syncobj) {
4238                                 close (fence->temp_syncobj);
4239                                 fence->temp_syncobj = 0;
4240                         } else {
4241                                 device->ws->reset_syncobj(device->ws, syncobj_handle);
4242                         }
4243                 }
4244                 break;
4245         default:
4246                 unreachable("Unhandled fence handle type");
4247         }
4248
4249         if (ret)
4250                 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4251         return VK_SUCCESS;
4252 }
4253
4254 void radv_GetPhysicalDeviceExternalFenceProperties(
4255         VkPhysicalDevice                            physicalDevice,
4256         const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4257         VkExternalFencePropertiesKHR*           pExternalFenceProperties)
4258 {
4259         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4260
4261         if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4262             (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
4263              pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4264                 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4265                 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4266                 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4267                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4268         } else {
4269                 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4270                 pExternalFenceProperties->compatibleHandleTypes = 0;
4271                 pExternalFenceProperties->externalFenceFeatures = 0;
4272         }
4273 }
4274
4275 VkResult
4276 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4277                                  const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4278                                  const VkAllocationCallbacks* pAllocator,
4279                                  VkDebugReportCallbackEXT* pCallback)
4280 {
4281         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4282         return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4283                                                pCreateInfo, pAllocator, &instance->alloc,
4284                                                pCallback);
4285 }
4286
4287 void
4288 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4289                                   VkDebugReportCallbackEXT _callback,
4290                                   const VkAllocationCallbacks* pAllocator)
4291 {
4292         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4293         vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4294                                          _callback, pAllocator, &instance->alloc);
4295 }
4296
4297 void
4298 radv_DebugReportMessageEXT(VkInstance _instance,
4299                           VkDebugReportFlagsEXT flags,
4300                           VkDebugReportObjectTypeEXT objectType,
4301                           uint64_t object,
4302                           size_t location,
4303                           int32_t messageCode,
4304                           const char* pLayerPrefix,
4305                           const char* pMessage)
4306 {
4307         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4308         vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4309                         object, location, messageCode, pLayerPrefix, pMessage);
4310 }
4311
4312 void
4313 radv_GetDeviceGroupPeerMemoryFeatures(
4314     VkDevice                                    device,
4315     uint32_t                                    heapIndex,
4316     uint32_t                                    localDeviceIndex,
4317     uint32_t                                    remoteDeviceIndex,
4318     VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
4319 {
4320         assert(localDeviceIndex == remoteDeviceIndex);
4321
4322         *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
4323                                VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
4324                                VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
4325                                VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
4326 }