OSDN Git Service

cc88abb57a897f7a6758761e2fe761495ddae9d5
[android-x86/external-mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "addrlib/gfx9/chip/gfx9_enum.h"
48 #include "util/debug.h"
49
50 static int
51 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
52 {
53         uint32_t mesa_timestamp, llvm_timestamp;
54         uint16_t f = family;
55         memset(uuid, 0, VK_UUID_SIZE);
56         if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
57             !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
58                 return -1;
59
60         memcpy(uuid, &mesa_timestamp, 4);
61         memcpy((char*)uuid + 4, &llvm_timestamp, 4);
62         memcpy((char*)uuid + 8, &f, 2);
63         snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
64         return 0;
65 }
66
67 static void
68 radv_get_driver_uuid(void *uuid)
69 {
70         ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
71 }
72
73 static void
74 radv_get_device_uuid(struct radeon_info *info, void *uuid)
75 {
76         ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
77 }
78
79 static void
80 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
81 {
82         const char *chip_string;
83         char llvm_string[32] = {};
84
85         switch (family) {
86         case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
87         case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
88         case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
89         case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
90         case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
91         case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
92         case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
93         case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
94         case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
95         case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
96         case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
97         case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
98         case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
99         case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
100         case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
101         case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
102         case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
103         case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
104         case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
105         case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
106         case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
107         case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
108         default: chip_string = "AMD RADV unknown"; break;
109         }
110
111         snprintf(llvm_string, sizeof(llvm_string),
112                  " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
113                  HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
114         snprintf(name, name_len, "%s%s", chip_string, llvm_string);
115 }
116
117 static void
118 radv_physical_device_init_mem_types(struct radv_physical_device *device)
119 {
120         STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
121         uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
122                                           device->rad_info.vram_vis_size);
123
124         int vram_index = -1, visible_vram_index = -1, gart_index = -1;
125         device->memory_properties.memoryHeapCount = 0;
126         if (device->rad_info.vram_size - visible_vram_size > 0) {
127                 vram_index = device->memory_properties.memoryHeapCount++;
128                 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
129                         .size = device->rad_info.vram_size - visible_vram_size,
130                         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
131                 };
132         }
133         if (visible_vram_size) {
134                 visible_vram_index = device->memory_properties.memoryHeapCount++;
135                 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
136                         .size = visible_vram_size,
137                         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
138                 };
139         }
140         if (device->rad_info.gart_size > 0) {
141                 gart_index = device->memory_properties.memoryHeapCount++;
142                 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
143                         .size = device->rad_info.gart_size,
144                         .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
145                 };
146         }
147
148         STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
149         unsigned type_count = 0;
150         if (vram_index >= 0) {
151                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
152                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
153                         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
154                         .heapIndex = vram_index,
155                 };
156         }
157         if (gart_index >= 0) {
158                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
159                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
160                         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
161                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
162                         (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
163                         .heapIndex = gart_index,
164                 };
165         }
166         if (visible_vram_index >= 0) {
167                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
168                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
169                         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
170                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
171                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
172                         .heapIndex = visible_vram_index,
173                 };
174         }
175         if (gart_index >= 0) {
176                 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
177                 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
178                         .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
179                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
180                         VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
181                         (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
182                         .heapIndex = gart_index,
183                 };
184         }
185         device->memory_properties.memoryTypeCount = type_count;
186 }
187
188 static void
189 radv_handle_env_var_force_family(struct radv_physical_device *device)
190 {
191         const char *family = getenv("RADV_FORCE_FAMILY");
192         unsigned i;
193
194         if (!family)
195                 return;
196
197         for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
198                 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
199                         /* Override family and chip_class. */
200                         device->rad_info.family = i;
201
202                         if (i >= CHIP_VEGA10)
203                                 device->rad_info.chip_class = GFX9;
204                         else if (i >= CHIP_TONGA)
205                                 device->rad_info.chip_class = VI;
206                         else if (i >= CHIP_BONAIRE)
207                                 device->rad_info.chip_class = CIK;
208                         else
209                                 device->rad_info.chip_class = SI;
210
211                         return;
212                 }
213         }
214
215         fprintf(stderr, "radv: Unknown family: %s\n", family);
216         exit(1);
217 }
218
219 static VkResult
220 radv_physical_device_init(struct radv_physical_device *device,
221                           struct radv_instance *instance,
222                           drmDevicePtr drm_device)
223 {
224         const char *path = drm_device->nodes[DRM_NODE_RENDER];
225         VkResult result;
226         drmVersionPtr version;
227         int fd;
228         int master_fd = -1;
229
230         fd = open(path, O_RDWR | O_CLOEXEC);
231         if (fd < 0) {
232                 if (instance->debug_flags & RADV_DEBUG_STARTUP)
233                         radv_logi("Could not open device '%s'", path);
234
235                 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
236         }
237
238         version = drmGetVersion(fd);
239         if (!version) {
240                 close(fd);
241
242                 if (instance->debug_flags & RADV_DEBUG_STARTUP)
243                         radv_logi("Could not get the kernel driver version for device '%s'", path);
244
245                 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
246                                  "failed to get version %s: %m", path);
247         }
248
249         if (strcmp(version->name, "amdgpu")) {
250                 drmFreeVersion(version);
251                 if (master_fd != -1)
252                         close(master_fd);
253                 close(fd);
254
255                 if (instance->debug_flags & RADV_DEBUG_STARTUP)
256                         radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
257
258                 return VK_ERROR_INCOMPATIBLE_DRIVER;
259         }
260         drmFreeVersion(version);
261
262         if (instance->debug_flags & RADV_DEBUG_STARTUP)
263                         radv_logi("Found compatible device '%s'.", path);
264
265         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
266         device->instance = instance;
267         assert(strlen(path) < ARRAY_SIZE(device->path));
268         strncpy(device->path, path, ARRAY_SIZE(device->path));
269
270         device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
271                                                instance->perftest_flags);
272         if (!device->ws) {
273                 result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
274                 goto fail;
275         }
276
277         if (instance->enabled_extensions.KHR_display) {
278                 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
279                 if (master_fd >= 0) {
280                         uint32_t accel_working = 0;
281                         struct drm_amdgpu_info request = {
282                                 .return_pointer = (uintptr_t)&accel_working,
283                                 .return_size = sizeof(accel_working),
284                                 .query = AMDGPU_INFO_ACCEL_WORKING
285                         };
286
287                         if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
288                                 close(master_fd);
289                                 master_fd = -1;
290                         }
291                 }
292         }
293
294         device->master_fd = master_fd;
295         device->local_fd = fd;
296         device->ws->query_info(device->ws, &device->rad_info);
297
298         radv_handle_env_var_force_family(device);
299
300         radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
301
302         if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
303                 device->ws->destroy(device->ws);
304                 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
305                                    "cannot generate UUID");
306                 goto fail;
307         }
308
309         /* These flags affect shader compilation. */
310         uint64_t shader_env_flags =
311                 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
312                 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
313
314         /* The gpu id is already embedded in the uuid so we just pass "radv"
315          * when creating the cache.
316          */
317         char buf[VK_UUID_SIZE * 2 + 1];
318         disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
319         device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
320
321         if (device->rad_info.chip_class < VI ||
322             device->rad_info.chip_class > GFX9)
323                 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
324
325         radv_get_driver_uuid(&device->device_uuid);
326         radv_get_device_uuid(&device->rad_info, &device->device_uuid);
327
328         if (device->rad_info.family == CHIP_STONEY ||
329             device->rad_info.chip_class >= GFX9) {
330                 device->has_rbplus = true;
331                 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
332                                          device->rad_info.family == CHIP_VEGA12 ||
333                                          device->rad_info.family == CHIP_RAVEN;
334         }
335
336         /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
337          * on SI.
338          */
339         device->has_clear_state = device->rad_info.chip_class >= CIK;
340
341         device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
342
343         /* Vega10/Raven need a special workaround for a hardware bug. */
344         device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
345                                   device->rad_info.family == CHIP_RAVEN;
346
347         /* Out-of-order primitive rasterization. */
348         device->has_out_of_order_rast = device->rad_info.chip_class >= VI &&
349                                         device->rad_info.max_se >= 2;
350         device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
351                                             !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
352
353         device->dcc_msaa_allowed =
354                 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
355
356         radv_physical_device_init_mem_types(device);
357         radv_fill_device_extension_table(device, &device->supported_extensions);
358
359         result = radv_init_wsi(device);
360         if (result != VK_SUCCESS) {
361                 device->ws->destroy(device->ws);
362                 vk_error(instance, result);
363                 goto fail;
364         }
365
366         if ((device->instance->debug_flags & RADV_DEBUG_INFO))
367                 ac_print_gpu_info(&device->rad_info);
368
369         return VK_SUCCESS;
370
371 fail:
372         close(fd);
373         if (master_fd != -1)
374                 close(master_fd);
375         return result;
376 }
377
378 static void
379 radv_physical_device_finish(struct radv_physical_device *device)
380 {
381         radv_finish_wsi(device);
382         device->ws->destroy(device->ws);
383         disk_cache_destroy(device->disk_cache);
384         close(device->local_fd);
385         if (device->master_fd != -1)
386                 close(device->master_fd);
387 }
388
389 static void *
390 default_alloc_func(void *pUserData, size_t size, size_t align,
391                    VkSystemAllocationScope allocationScope)
392 {
393         return malloc(size);
394 }
395
396 static void *
397 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
398                      size_t align, VkSystemAllocationScope allocationScope)
399 {
400         return realloc(pOriginal, size);
401 }
402
403 static void
404 default_free_func(void *pUserData, void *pMemory)
405 {
406         free(pMemory);
407 }
408
409 static const VkAllocationCallbacks default_alloc = {
410         .pUserData = NULL,
411         .pfnAllocation = default_alloc_func,
412         .pfnReallocation = default_realloc_func,
413         .pfnFree = default_free_func,
414 };
415
416 static const struct debug_control radv_debug_options[] = {
417         {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
418         {"nodcc", RADV_DEBUG_NO_DCC},
419         {"shaders", RADV_DEBUG_DUMP_SHADERS},
420         {"nocache", RADV_DEBUG_NO_CACHE},
421         {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
422         {"nohiz", RADV_DEBUG_NO_HIZ},
423         {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
424         {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
425         {"allbos", RADV_DEBUG_ALL_BOS},
426         {"noibs", RADV_DEBUG_NO_IBS},
427         {"spirv", RADV_DEBUG_DUMP_SPIRV},
428         {"vmfaults", RADV_DEBUG_VM_FAULTS},
429         {"zerovram", RADV_DEBUG_ZERO_VRAM},
430         {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
431         {"nosisched", RADV_DEBUG_NO_SISCHED},
432         {"preoptir", RADV_DEBUG_PREOPTIR},
433         {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
434         {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
435         {"info", RADV_DEBUG_INFO},
436         {"errors", RADV_DEBUG_ERRORS},
437         {"startup", RADV_DEBUG_STARTUP},
438         {"checkir", RADV_DEBUG_CHECKIR},
439         {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
440         {NULL, 0}
441 };
442
443 const char *
444 radv_get_debug_option_name(int id)
445 {
446         assert(id < ARRAY_SIZE(radv_debug_options) - 1);
447         return radv_debug_options[id].string;
448 }
449
450 static const struct debug_control radv_perftest_options[] = {
451         {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
452         {"sisched", RADV_PERFTEST_SISCHED},
453         {"localbos", RADV_PERFTEST_LOCAL_BOS},
454         {"binning", RADV_PERFTEST_BINNING},
455         {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
456         {NULL, 0}
457 };
458
459 const char *
460 radv_get_perftest_option_name(int id)
461 {
462         assert(id < ARRAY_SIZE(radv_debug_options) - 1);
463         return radv_perftest_options[id].string;
464 }
465
466 static void
467 radv_handle_per_app_options(struct radv_instance *instance,
468                             const VkApplicationInfo *info)
469 {
470         const char *name = info ? info->pApplicationName : NULL;
471
472         if (!name)
473                 return;
474
475         if (!strcmp(name, "Talos - Linux - 32bit") ||
476             !strcmp(name, "Talos - Linux - 64bit")) {
477                 if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
478                         /* Force enable LLVM sisched for Talos because it looks
479                          * safe and it gives few more FPS.
480                          */
481                         instance->perftest_flags |= RADV_PERFTEST_SISCHED;
482                 }
483         } else if (!strcmp(name, "DOOM_VFR")) {
484                 /* Work around a Doom VFR game bug */
485                 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
486         }
487 }
488
489 static int radv_get_instance_extension_index(const char *name)
490 {
491         for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
492                 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
493                         return i;
494         }
495         return -1;
496 }
497
498
499 VkResult radv_CreateInstance(
500         const VkInstanceCreateInfo*                 pCreateInfo,
501         const VkAllocationCallbacks*                pAllocator,
502         VkInstance*                                 pInstance)
503 {
504         struct radv_instance *instance;
505         VkResult result;
506
507         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
508
509         uint32_t client_version;
510         if (pCreateInfo->pApplicationInfo &&
511             pCreateInfo->pApplicationInfo->apiVersion != 0) {
512                 client_version = pCreateInfo->pApplicationInfo->apiVersion;
513         } else {
514                 radv_EnumerateInstanceVersion(&client_version);
515         }
516
517         instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
518                               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
519         if (!instance)
520                 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
521
522         instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
523
524         if (pAllocator)
525                 instance->alloc = *pAllocator;
526         else
527                 instance->alloc = default_alloc;
528
529         instance->apiVersion = client_version;
530         instance->physicalDeviceCount = -1;
531
532         instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
533                                                    radv_debug_options);
534
535         instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
536                                                    radv_perftest_options);
537
538
539         if (instance->debug_flags & RADV_DEBUG_STARTUP)
540                 radv_logi("Created an instance");
541
542         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
543                 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
544                 int index = radv_get_instance_extension_index(ext_name);
545
546                 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
547                         vk_free2(&default_alloc, pAllocator, instance);
548                         return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
549                 }
550
551                 instance->enabled_extensions.extensions[index] = true;
552         }
553
554         result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
555         if (result != VK_SUCCESS) {
556                 vk_free2(&default_alloc, pAllocator, instance);
557                 return vk_error(instance, result);
558         }
559
560         _mesa_locale_init();
561
562         VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
563
564         radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
565
566         *pInstance = radv_instance_to_handle(instance);
567
568         return VK_SUCCESS;
569 }
570
571 void radv_DestroyInstance(
572         VkInstance                                  _instance,
573         const VkAllocationCallbacks*                pAllocator)
574 {
575         RADV_FROM_HANDLE(radv_instance, instance, _instance);
576
577         if (!instance)
578                 return;
579
580         for (int i = 0; i < instance->physicalDeviceCount; ++i) {
581                 radv_physical_device_finish(instance->physicalDevices + i);
582         }
583
584         VG(VALGRIND_DESTROY_MEMPOOL(instance));
585
586         _mesa_locale_fini();
587
588         vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
589
590         vk_free(&instance->alloc, instance);
591 }
592
593 static VkResult
594 radv_enumerate_devices(struct radv_instance *instance)
595 {
596         /* TODO: Check for more devices ? */
597         drmDevicePtr devices[8];
598         VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
599         int max_devices;
600
601         instance->physicalDeviceCount = 0;
602
603         max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
604
605         if (instance->debug_flags & RADV_DEBUG_STARTUP)
606                 radv_logi("Found %d drm nodes", max_devices);
607
608         if (max_devices < 1)
609                 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
610
611         for (unsigned i = 0; i < (unsigned)max_devices; i++) {
612                 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
613                     devices[i]->bustype == DRM_BUS_PCI &&
614                     devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
615
616                         result = radv_physical_device_init(instance->physicalDevices +
617                                                            instance->physicalDeviceCount,
618                                                            instance,
619                                                            devices[i]);
620                         if (result == VK_SUCCESS)
621                                 ++instance->physicalDeviceCount;
622                         else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
623                                 break;
624                 }
625         }
626         drmFreeDevices(devices, max_devices);
627
628         return result;
629 }
630
631 VkResult radv_EnumeratePhysicalDevices(
632         VkInstance                                  _instance,
633         uint32_t*                                   pPhysicalDeviceCount,
634         VkPhysicalDevice*                           pPhysicalDevices)
635 {
636         RADV_FROM_HANDLE(radv_instance, instance, _instance);
637         VkResult result;
638
639         if (instance->physicalDeviceCount < 0) {
640                 result = radv_enumerate_devices(instance);
641                 if (result != VK_SUCCESS &&
642                     result != VK_ERROR_INCOMPATIBLE_DRIVER)
643                         return result;
644         }
645
646         if (!pPhysicalDevices) {
647                 *pPhysicalDeviceCount = instance->physicalDeviceCount;
648         } else {
649                 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
650                 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
651                         pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
652         }
653
654         return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
655                                                                      : VK_SUCCESS;
656 }
657
658 VkResult radv_EnumeratePhysicalDeviceGroups(
659     VkInstance                                  _instance,
660     uint32_t*                                   pPhysicalDeviceGroupCount,
661     VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
662 {
663         RADV_FROM_HANDLE(radv_instance, instance, _instance);
664         VkResult result;
665
666         if (instance->physicalDeviceCount < 0) {
667                 result = radv_enumerate_devices(instance);
668                 if (result != VK_SUCCESS &&
669                     result != VK_ERROR_INCOMPATIBLE_DRIVER)
670                         return result;
671         }
672
673         if (!pPhysicalDeviceGroupProperties) {
674                 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
675         } else {
676                 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
677                 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
678                         pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
679                         pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
680                         pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
681                 }
682         }
683         return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
684                                                                           : VK_SUCCESS;
685 }
686
687 void radv_GetPhysicalDeviceFeatures(
688         VkPhysicalDevice                            physicalDevice,
689         VkPhysicalDeviceFeatures*                   pFeatures)
690 {
691         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
692         memset(pFeatures, 0, sizeof(*pFeatures));
693
694         *pFeatures = (VkPhysicalDeviceFeatures) {
695                 .robustBufferAccess                       = true,
696                 .fullDrawIndexUint32                      = true,
697                 .imageCubeArray                           = true,
698                 .independentBlend                         = true,
699                 .geometryShader                           = true,
700                 .tessellationShader                       = true,
701                 .sampleRateShading                        = true,
702                 .dualSrcBlend                             = true,
703                 .logicOp                                  = true,
704                 .multiDrawIndirect                        = true,
705                 .drawIndirectFirstInstance                = true,
706                 .depthClamp                               = true,
707                 .depthBiasClamp                           = true,
708                 .fillModeNonSolid                         = true,
709                 .depthBounds                              = true,
710                 .wideLines                                = true,
711                 .largePoints                              = true,
712                 .alphaToOne                               = true,
713                 .multiViewport                            = true,
714                 .samplerAnisotropy                        = true,
715                 .textureCompressionETC2                   = pdevice->rad_info.chip_class >= GFX9 ||
716                                                             pdevice->rad_info.family == CHIP_STONEY,
717                 .textureCompressionASTC_LDR               = false,
718                 .textureCompressionBC                     = true,
719                 .occlusionQueryPrecise                    = true,
720                 .pipelineStatisticsQuery                  = true,
721                 .vertexPipelineStoresAndAtomics           = true,
722                 .fragmentStoresAndAtomics                 = true,
723                 .shaderTessellationAndGeometryPointSize   = true,
724                 .shaderImageGatherExtended                = true,
725                 .shaderStorageImageExtendedFormats        = true,
726                 .shaderStorageImageMultisample            = false,
727                 .shaderUniformBufferArrayDynamicIndexing  = true,
728                 .shaderSampledImageArrayDynamicIndexing   = true,
729                 .shaderStorageBufferArrayDynamicIndexing  = true,
730                 .shaderStorageImageArrayDynamicIndexing   = true,
731                 .shaderStorageImageReadWithoutFormat      = true,
732                 .shaderStorageImageWriteWithoutFormat     = true,
733                 .shaderClipDistance                       = true,
734                 .shaderCullDistance                       = true,
735                 .shaderFloat64                            = true,
736                 .shaderInt64                              = true,
737                 .shaderInt16                              = false,
738                 .sparseBinding                            = true,
739                 .variableMultisampleRate                  = true,
740                 .inheritedQueries                         = true,
741         };
742 }
743
744 void radv_GetPhysicalDeviceFeatures2(
745         VkPhysicalDevice                            physicalDevice,
746         VkPhysicalDeviceFeatures2KHR               *pFeatures)
747 {
748         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
749         vk_foreach_struct(ext, pFeatures->pNext) {
750                 switch (ext->sType) {
751                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
752                         VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
753                         features->variablePointersStorageBuffer = true;
754                         features->variablePointers = false;
755                         break;
756                 }
757                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
758                         VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
759                         features->multiview = true;
760                         features->multiviewGeometryShader = true;
761                         features->multiviewTessellationShader = true;
762                         break;
763                 }
764                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
765                         VkPhysicalDeviceShaderDrawParameterFeatures *features =
766                             (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
767                         features->shaderDrawParameters = true;
768                         break;
769                 }
770                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
771                         VkPhysicalDeviceProtectedMemoryFeatures *features =
772                             (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
773                         features->protectedMemory = false;
774                         break;
775                 }
776                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
777                         VkPhysicalDevice16BitStorageFeatures *features =
778                             (VkPhysicalDevice16BitStorageFeatures*)ext;
779                         bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI;
780                         features->storageBuffer16BitAccess = enabled;
781                         features->uniformAndStorageBuffer16BitAccess = enabled;
782                         features->storagePushConstant16 = enabled;
783                         features->storageInputOutput16 = enabled;
784                         break;
785                 }
786                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
787                         VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
788                             (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
789                         features->samplerYcbcrConversion = false;
790                         break;
791                 }
792                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
793                         VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
794                                 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext;
795                         features->shaderInputAttachmentArrayDynamicIndexing = true;
796                         features->shaderUniformTexelBufferArrayDynamicIndexing = true;
797                         features->shaderStorageTexelBufferArrayDynamicIndexing = true;
798                         features->shaderUniformBufferArrayNonUniformIndexing = false;
799                         features->shaderSampledImageArrayNonUniformIndexing = false;
800                         features->shaderStorageBufferArrayNonUniformIndexing = false;
801                         features->shaderStorageImageArrayNonUniformIndexing = false;
802                         features->shaderInputAttachmentArrayNonUniformIndexing = false;
803                         features->shaderUniformTexelBufferArrayNonUniformIndexing = false;
804                         features->shaderStorageTexelBufferArrayNonUniformIndexing = false;
805                         features->descriptorBindingUniformBufferUpdateAfterBind = true;
806                         features->descriptorBindingSampledImageUpdateAfterBind = true;
807                         features->descriptorBindingStorageImageUpdateAfterBind = true;
808                         features->descriptorBindingStorageBufferUpdateAfterBind = true;
809                         features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
810                         features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
811                         features->descriptorBindingUpdateUnusedWhilePending = true;
812                         features->descriptorBindingPartiallyBound = true;
813                         features->descriptorBindingVariableDescriptorCount = true;
814                         features->runtimeDescriptorArray = true;
815                         break;
816                 }
817                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
818                         VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
819                                 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
820                         features->conditionalRendering = true;
821                         features->inheritedConditionalRendering = false;
822                         break;
823                 }
824                 default:
825                         break;
826                 }
827         }
828         return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
829 }
830
831 void radv_GetPhysicalDeviceProperties(
832         VkPhysicalDevice                            physicalDevice,
833         VkPhysicalDeviceProperties*                 pProperties)
834 {
835         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
836         VkSampleCountFlags sample_counts = 0xf;
837
838         /* make sure that the entire descriptor set is addressable with a signed
839          * 32-bit int. So the sum of all limits scaled by descriptor size has to
840          * be at most 2 GiB. the combined image & samples object count as one of
841          * both. This limit is for the pipeline layout, not for the set layout, but
842          * there is no set limit, so we just set a pipeline limit. I don't think
843          * any app is going to hit this soon. */
844         size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
845                   (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
846                    32 /* storage buffer, 32 due to potential space wasted on alignment */ +
847                    32 /* sampler, largest when combined with image */ +
848                    64 /* sampled image */ +
849                    64 /* storage image */);
850
851         VkPhysicalDeviceLimits limits = {
852                 .maxImageDimension1D                      = (1 << 14),
853                 .maxImageDimension2D                      = (1 << 14),
854                 .maxImageDimension3D                      = (1 << 11),
855                 .maxImageDimensionCube                    = (1 << 14),
856                 .maxImageArrayLayers                      = (1 << 11),
857                 .maxTexelBufferElements                   = 128 * 1024 * 1024,
858                 .maxUniformBufferRange                    = UINT32_MAX,
859                 .maxStorageBufferRange                    = UINT32_MAX,
860                 .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
861                 .maxMemoryAllocationCount                 = UINT32_MAX,
862                 .maxSamplerAllocationCount                = 64 * 1024,
863                 .bufferImageGranularity                   = 64, /* A cache line */
864                 .sparseAddressSpaceSize                   = 0xffffffffu, /* buffer max size */
865                 .maxBoundDescriptorSets                   = MAX_SETS,
866                 .maxPerStageDescriptorSamplers            = max_descriptor_set_size,
867                 .maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
868                 .maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
869                 .maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
870                 .maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
871                 .maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
872                 .maxPerStageResources                     = max_descriptor_set_size,
873                 .maxDescriptorSetSamplers                 = max_descriptor_set_size,
874                 .maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
875                 .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_UNIFORM_BUFFERS,
876                 .maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
877                 .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_STORAGE_BUFFERS,
878                 .maxDescriptorSetSampledImages            = max_descriptor_set_size,
879                 .maxDescriptorSetStorageImages            = max_descriptor_set_size,
880                 .maxDescriptorSetInputAttachments         = max_descriptor_set_size,
881                 .maxVertexInputAttributes                 = 32,
882                 .maxVertexInputBindings                   = 32,
883                 .maxVertexInputAttributeOffset            = 2047,
884                 .maxVertexInputBindingStride              = 2048,
885                 .maxVertexOutputComponents                = 128,
886                 .maxTessellationGenerationLevel           = 64,
887                 .maxTessellationPatchSize                 = 32,
888                 .maxTessellationControlPerVertexInputComponents = 128,
889                 .maxTessellationControlPerVertexOutputComponents = 128,
890                 .maxTessellationControlPerPatchOutputComponents = 120,
891                 .maxTessellationControlTotalOutputComponents = 4096,
892                 .maxTessellationEvaluationInputComponents = 128,
893                 .maxTessellationEvaluationOutputComponents = 128,
894                 .maxGeometryShaderInvocations             = 127,
895                 .maxGeometryInputComponents               = 64,
896                 .maxGeometryOutputComponents              = 128,
897                 .maxGeometryOutputVertices                = 256,
898                 .maxGeometryTotalOutputComponents         = 1024,
899                 .maxFragmentInputComponents               = 128,
900                 .maxFragmentOutputAttachments             = 8,
901                 .maxFragmentDualSrcAttachments            = 1,
902                 .maxFragmentCombinedOutputResources       = 8,
903                 .maxComputeSharedMemorySize               = 32768,
904                 .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
905                 .maxComputeWorkGroupInvocations           = 2048,
906                 .maxComputeWorkGroupSize = {
907                         2048,
908                         2048,
909                         2048
910                 },
911                 .subPixelPrecisionBits                    = 4 /* FIXME */,
912                 .subTexelPrecisionBits                    = 4 /* FIXME */,
913                 .mipmapPrecisionBits                      = 4 /* FIXME */,
914                 .maxDrawIndexedIndexValue                 = UINT32_MAX,
915                 .maxDrawIndirectCount                     = UINT32_MAX,
916                 .maxSamplerLodBias                        = 16,
917                 .maxSamplerAnisotropy                     = 16,
918                 .maxViewports                             = MAX_VIEWPORTS,
919                 .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
920                 .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
921                 .viewportSubPixelBits                     = 8,
922                 .minMemoryMapAlignment                    = 4096, /* A page */
923                 .minTexelBufferOffsetAlignment            = 1,
924                 .minUniformBufferOffsetAlignment          = 4,
925                 .minStorageBufferOffsetAlignment          = 4,
926                 .minTexelOffset                           = -32,
927                 .maxTexelOffset                           = 31,
928                 .minTexelGatherOffset                     = -32,
929                 .maxTexelGatherOffset                     = 31,
930                 .minInterpolationOffset                   = -2,
931                 .maxInterpolationOffset                   = 2,
932                 .subPixelInterpolationOffsetBits          = 8,
933                 .maxFramebufferWidth                      = (1 << 14),
934                 .maxFramebufferHeight                     = (1 << 14),
935                 .maxFramebufferLayers                     = (1 << 10),
936                 .framebufferColorSampleCounts             = sample_counts,
937                 .framebufferDepthSampleCounts             = sample_counts,
938                 .framebufferStencilSampleCounts           = sample_counts,
939                 .framebufferNoAttachmentsSampleCounts     = sample_counts,
940                 .maxColorAttachments                      = MAX_RTS,
941                 .sampledImageColorSampleCounts            = sample_counts,
942                 .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
943                 .sampledImageDepthSampleCounts            = sample_counts,
944                 .sampledImageStencilSampleCounts          = sample_counts,
945                 .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
946                 .maxSampleMaskWords                       = 1,
947                 .timestampComputeAndGraphics              = true,
948                 .timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
949                 .maxClipDistances                         = 8,
950                 .maxCullDistances                         = 8,
951                 .maxCombinedClipAndCullDistances          = 8,
952                 .discreteQueuePriorities                  = 1,
953                 .pointSizeRange                           = { 0.125, 255.875 },
954                 .lineWidthRange                           = { 0.0, 7.9921875 },
955                 .pointSizeGranularity                     = (1.0 / 8.0),
956                 .lineWidthGranularity                     = (1.0 / 128.0),
957                 .strictLines                              = false, /* FINISHME */
958                 .standardSampleLocations                  = true,
959                 .optimalBufferCopyOffsetAlignment         = 128,
960                 .optimalBufferCopyRowPitchAlignment       = 128,
961                 .nonCoherentAtomSize                      = 64,
962         };
963
964         *pProperties = (VkPhysicalDeviceProperties) {
965                 .apiVersion = radv_physical_device_api_version(pdevice),
966                 .driverVersion = vk_get_driver_version(),
967                 .vendorID = ATI_VENDOR_ID,
968                 .deviceID = pdevice->rad_info.pci_id,
969                 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
970                 .limits = limits,
971                 .sparseProperties = {0},
972         };
973
974         strcpy(pProperties->deviceName, pdevice->name);
975         memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
976 }
977
978 void radv_GetPhysicalDeviceProperties2(
979         VkPhysicalDevice                            physicalDevice,
980         VkPhysicalDeviceProperties2KHR             *pProperties)
981 {
982         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
983         radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
984
985         vk_foreach_struct(ext, pProperties->pNext) {
986                 switch (ext->sType) {
987                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
988                         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
989                                 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
990                         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
991                         break;
992                 }
993                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
994                         VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
995                         memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
996                         memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
997                         properties->deviceLUIDValid = false;
998                         break;
999                 }
1000                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
1001                         VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
1002                         properties->maxMultiviewViewCount = MAX_VIEWS;
1003                         properties->maxMultiviewInstanceIndex = INT_MAX;
1004                         break;
1005                 }
1006                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
1007                         VkPhysicalDevicePointClippingPropertiesKHR *properties =
1008                             (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
1009                         properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
1010                         break;
1011                 }
1012                 case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1013                         VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1014                             (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1015                         properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1016                         break;
1017                 }
1018                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1019                         VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1020                             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1021                         properties->minImportedHostPointerAlignment = 4096;
1022                         break;
1023                 }
1024                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1025                         VkPhysicalDeviceSubgroupProperties *properties =
1026                             (VkPhysicalDeviceSubgroupProperties*)ext;
1027                         properties->subgroupSize = 64;
1028                         properties->supportedStages = VK_SHADER_STAGE_ALL;
1029                         properties->supportedOperations =
1030                                                         VK_SUBGROUP_FEATURE_BASIC_BIT |
1031                                                         VK_SUBGROUP_FEATURE_BALLOT_BIT |
1032                                                         VK_SUBGROUP_FEATURE_QUAD_BIT |
1033                                                         VK_SUBGROUP_FEATURE_VOTE_BIT;
1034                         if (pdevice->rad_info.chip_class >= VI) {
1035                                 properties->supportedOperations |=
1036                                                         VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1037                                                         VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1038                         }
1039                         properties->quadOperationsInAllStages = true;
1040                         break;
1041                 }
1042                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1043                         VkPhysicalDeviceMaintenance3Properties *properties =
1044                             (VkPhysicalDeviceMaintenance3Properties*)ext;
1045                         /* Make sure everything is addressable by a signed 32-bit int, and
1046                          * our largest descriptors are 96 bytes. */
1047                         properties->maxPerSetDescriptors = (1ull << 31) / 96;
1048                         /* Our buffer size fields allow only this much */
1049                         properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
1050                         break;
1051                 }
1052                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
1053                         VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
1054                                 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
1055                         /* GFX6-8 only support single channel min/max filter. */
1056                         properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1057                         properties->filterMinmaxSingleComponentFormats = true;
1058                         break;
1059                 }
1060                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1061                         VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1062                                 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1063
1064                         /* Shader engines. */
1065                         properties->shaderEngineCount =
1066                                 pdevice->rad_info.max_se;
1067                         properties->shaderArraysPerEngineCount =
1068                                 pdevice->rad_info.max_sh_per_se;
1069                         properties->computeUnitsPerShaderArray =
1070                                 pdevice->rad_info.num_good_compute_units /
1071                                         (pdevice->rad_info.max_se *
1072                                          pdevice->rad_info.max_sh_per_se);
1073                         properties->simdPerComputeUnit = 4;
1074                         properties->wavefrontsPerSimd =
1075                                 pdevice->rad_info.family == CHIP_TONGA ||
1076                                 pdevice->rad_info.family == CHIP_ICELAND ||
1077                                 pdevice->rad_info.family == CHIP_POLARIS10 ||
1078                                 pdevice->rad_info.family == CHIP_POLARIS11 ||
1079                                 pdevice->rad_info.family == CHIP_POLARIS12 ||
1080                                 pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
1081                         properties->wavefrontSize = 64;
1082
1083                         /* SGPR. */
1084                         properties->sgprsPerSimd =
1085                                 radv_get_num_physical_sgprs(pdevice);
1086                         properties->minSgprAllocation =
1087                                 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1088                         properties->maxSgprAllocation =
1089                                 pdevice->rad_info.family == CHIP_TONGA ||
1090                                 pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
1091                         properties->sgprAllocationGranularity =
1092                                 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1093
1094                         /* VGPR. */
1095                         properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
1096                         properties->minVgprAllocation = 4;
1097                         properties->maxVgprAllocation = 256;
1098                         properties->vgprAllocationGranularity = 4;
1099                         break;
1100                 }
1101                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1102                         VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1103                                 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1104                         properties->maxVertexAttribDivisor = UINT32_MAX;
1105                         break;
1106                 }
1107                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
1108                         VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
1109                                 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
1110                         properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1111                         properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
1112                         properties->shaderSampledImageArrayNonUniformIndexingNative = false;
1113                         properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
1114                         properties->shaderStorageImageArrayNonUniformIndexingNative = false;
1115                         properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1116                         properties->robustBufferAccessUpdateAfterBind = false;
1117                         properties->quadDivergentImplicitLod = false;
1118
1119                         size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
1120                                   (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1121                                    32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1122                                    32 /* sampler, largest when combined with image */ +
1123                                    64 /* sampled image */ +
1124                                    64 /* storage image */);
1125                         properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1126                         properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1127                         properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1128                         properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1129                         properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1130                         properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1131                         properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1132                         properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1133                         properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1134                         properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1135                         properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1136                         properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1137                         properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1138                         properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1139                         properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1140                         break;
1141                 }
1142                 default:
1143                         break;
1144                 }
1145         }
1146 }
1147
1148 static void radv_get_physical_device_queue_family_properties(
1149         struct radv_physical_device*                pdevice,
1150         uint32_t*                                   pCount,
1151         VkQueueFamilyProperties**                    pQueueFamilyProperties)
1152 {
1153         int num_queue_families = 1;
1154         int idx;
1155         if (pdevice->rad_info.num_compute_rings > 0 &&
1156             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1157                 num_queue_families++;
1158
1159         if (pQueueFamilyProperties == NULL) {
1160                 *pCount = num_queue_families;
1161                 return;
1162         }
1163
1164         if (!*pCount)
1165                 return;
1166
1167         idx = 0;
1168         if (*pCount >= 1) {
1169                 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1170                         .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1171                                       VK_QUEUE_COMPUTE_BIT |
1172                                       VK_QUEUE_TRANSFER_BIT |
1173                                       VK_QUEUE_SPARSE_BINDING_BIT,
1174                         .queueCount = 1,
1175                         .timestampValidBits = 64,
1176                         .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1177                 };
1178                 idx++;
1179         }
1180
1181         if (pdevice->rad_info.num_compute_rings > 0 &&
1182             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1183                 if (*pCount > idx) {
1184                         *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1185                                 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1186                                               VK_QUEUE_TRANSFER_BIT |
1187                                               VK_QUEUE_SPARSE_BINDING_BIT,
1188                                 .queueCount = pdevice->rad_info.num_compute_rings,
1189                                 .timestampValidBits = 64,
1190                                 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1191                         };
1192                         idx++;
1193                 }
1194         }
1195         *pCount = idx;
1196 }
1197
1198 void radv_GetPhysicalDeviceQueueFamilyProperties(
1199         VkPhysicalDevice                            physicalDevice,
1200         uint32_t*                                   pCount,
1201         VkQueueFamilyProperties*                    pQueueFamilyProperties)
1202 {
1203         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1204         if (!pQueueFamilyProperties) {
1205                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1206                 return;
1207         }
1208         VkQueueFamilyProperties *properties[] = {
1209                 pQueueFamilyProperties + 0,
1210                 pQueueFamilyProperties + 1,
1211                 pQueueFamilyProperties + 2,
1212         };
1213         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1214         assert(*pCount <= 3);
1215 }
1216
1217 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1218         VkPhysicalDevice                            physicalDevice,
1219         uint32_t*                                   pCount,
1220         VkQueueFamilyProperties2KHR                *pQueueFamilyProperties)
1221 {
1222         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1223         if (!pQueueFamilyProperties) {
1224                 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1225                 return;
1226         }
1227         VkQueueFamilyProperties *properties[] = {
1228                 &pQueueFamilyProperties[0].queueFamilyProperties,
1229                 &pQueueFamilyProperties[1].queueFamilyProperties,
1230                 &pQueueFamilyProperties[2].queueFamilyProperties,
1231         };
1232         radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1233         assert(*pCount <= 3);
1234 }
1235
1236 void radv_GetPhysicalDeviceMemoryProperties(
1237         VkPhysicalDevice                            physicalDevice,
1238         VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
1239 {
1240         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1241
1242         *pMemoryProperties = physical_device->memory_properties;
1243 }
1244
1245 void radv_GetPhysicalDeviceMemoryProperties2(
1246         VkPhysicalDevice                            physicalDevice,
1247         VkPhysicalDeviceMemoryProperties2KHR       *pMemoryProperties)
1248 {
1249         return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1250                                                       &pMemoryProperties->memoryProperties);
1251 }
1252
1253 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1254         VkDevice                                    _device,
1255         VkExternalMemoryHandleTypeFlagBitsKHR       handleType,
1256         const void                                 *pHostPointer,
1257         VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
1258 {
1259         RADV_FROM_HANDLE(radv_device, device, _device);
1260
1261         switch (handleType)
1262         {
1263         case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1264                 const struct radv_physical_device *physical_device = device->physical_device;
1265                 uint32_t memoryTypeBits = 0;
1266                 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1267                         if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1268                                 memoryTypeBits = (1 << i);
1269                                 break;
1270                         }
1271                 }
1272                 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1273                 return VK_SUCCESS;
1274         }
1275         default:
1276                 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1277         }
1278 }
1279
1280 static enum radeon_ctx_priority
1281 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1282 {
1283         /* Default to MEDIUM when a specific global priority isn't requested */
1284         if (!pObj)
1285                 return RADEON_CTX_PRIORITY_MEDIUM;
1286
1287         switch(pObj->globalPriority) {
1288         case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1289                 return RADEON_CTX_PRIORITY_REALTIME;
1290         case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1291                 return RADEON_CTX_PRIORITY_HIGH;
1292         case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1293                 return RADEON_CTX_PRIORITY_MEDIUM;
1294         case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1295                 return RADEON_CTX_PRIORITY_LOW;
1296         default:
1297                 unreachable("Illegal global priority value");
1298                 return RADEON_CTX_PRIORITY_INVALID;
1299         }
1300 }
1301
1302 static int
1303 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1304                 uint32_t queue_family_index, int idx,
1305                 VkDeviceQueueCreateFlags flags,
1306                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1307 {
1308         queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1309         queue->device = device;
1310         queue->queue_family_index = queue_family_index;
1311         queue->queue_idx = idx;
1312         queue->priority = radv_get_queue_global_priority(global_priority);
1313         queue->flags = flags;
1314
1315         queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1316         if (!queue->hw_ctx)
1317                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1318
1319         return VK_SUCCESS;
1320 }
1321
1322 static void
1323 radv_queue_finish(struct radv_queue *queue)
1324 {
1325         if (queue->hw_ctx)
1326                 queue->device->ws->ctx_destroy(queue->hw_ctx);
1327
1328         if (queue->initial_full_flush_preamble_cs)
1329                 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1330         if (queue->initial_preamble_cs)
1331                 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1332         if (queue->continue_preamble_cs)
1333                 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1334         if (queue->descriptor_bo)
1335                 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1336         if (queue->scratch_bo)
1337                 queue->device->ws->buffer_destroy(queue->scratch_bo);
1338         if (queue->esgs_ring_bo)
1339                 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1340         if (queue->gsvs_ring_bo)
1341                 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1342         if (queue->tess_rings_bo)
1343                 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1344         if (queue->compute_scratch_bo)
1345                 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1346 }
1347
1348 static void
1349 radv_bo_list_init(struct radv_bo_list *bo_list)
1350 {
1351         pthread_mutex_init(&bo_list->mutex, NULL);
1352         bo_list->list.count = bo_list->capacity = 0;
1353         bo_list->list.bos = NULL;
1354 }
1355
1356 static void
1357 radv_bo_list_finish(struct radv_bo_list *bo_list)
1358 {
1359         free(bo_list->list.bos);
1360         pthread_mutex_destroy(&bo_list->mutex);
1361 }
1362
1363 static VkResult radv_bo_list_add(struct radv_device *device,
1364                                  struct radeon_winsys_bo *bo)
1365 {
1366         struct radv_bo_list *bo_list = &device->bo_list;
1367
1368         if (unlikely(!device->use_global_bo_list))
1369                 return VK_SUCCESS;
1370
1371         pthread_mutex_lock(&bo_list->mutex);
1372         if (bo_list->list.count == bo_list->capacity) {
1373                 unsigned capacity = MAX2(4, bo_list->capacity * 2);
1374                 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
1375
1376                 if (!data) {
1377                         pthread_mutex_unlock(&bo_list->mutex);
1378                         return VK_ERROR_OUT_OF_HOST_MEMORY;
1379                 }
1380
1381                 bo_list->list.bos = (struct radeon_winsys_bo**)data;
1382                 bo_list->capacity = capacity;
1383         }
1384
1385         bo_list->list.bos[bo_list->list.count++] = bo;
1386         pthread_mutex_unlock(&bo_list->mutex);
1387         return VK_SUCCESS;
1388 }
1389
1390 static void radv_bo_list_remove(struct radv_device *device,
1391                                 struct radeon_winsys_bo *bo)
1392 {
1393         struct radv_bo_list *bo_list = &device->bo_list;
1394
1395         if (unlikely(!device->use_global_bo_list))
1396                 return;
1397
1398         pthread_mutex_lock(&bo_list->mutex);
1399         for(unsigned i = 0; i < bo_list->list.count; ++i) {
1400                 if (bo_list->list.bos[i] == bo) {
1401                         bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
1402                         --bo_list->list.count;
1403                         break;
1404                 }
1405         }
1406         pthread_mutex_unlock(&bo_list->mutex);
1407 }
1408
1409 static void
1410 radv_device_init_gs_info(struct radv_device *device)
1411 {
1412         device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
1413                                                        device->physical_device->rad_info.family);
1414 }
1415
1416 static int radv_get_device_extension_index(const char *name)
1417 {
1418         for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1419                 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1420                         return i;
1421         }
1422         return -1;
1423 }
1424
1425 VkResult radv_CreateDevice(
1426         VkPhysicalDevice                            physicalDevice,
1427         const VkDeviceCreateInfo*                   pCreateInfo,
1428         const VkAllocationCallbacks*                pAllocator,
1429         VkDevice*                                   pDevice)
1430 {
1431         RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1432         VkResult result;
1433         struct radv_device *device;
1434
1435         bool keep_shader_info = false;
1436
1437         /* Check enabled features */
1438         if (pCreateInfo->pEnabledFeatures) {
1439                 VkPhysicalDeviceFeatures supported_features;
1440                 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1441                 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1442                 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1443                 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1444                 for (uint32_t i = 0; i < num_features; i++) {
1445                         if (enabled_feature[i] && !supported_feature[i])
1446                                 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1447                 }
1448         }
1449
1450         device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1451                             sizeof(*device), 8,
1452                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1453         if (!device)
1454                 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1455
1456         device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1457         device->instance = physical_device->instance;
1458         device->physical_device = physical_device;
1459
1460         device->ws = physical_device->ws;
1461         if (pAllocator)
1462                 device->alloc = *pAllocator;
1463         else
1464                 device->alloc = physical_device->instance->alloc;
1465
1466         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1467                 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1468                 int index = radv_get_device_extension_index(ext_name);
1469                 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1470                         vk_free(&device->alloc, device);
1471                         return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
1472                 }
1473
1474                 device->enabled_extensions.extensions[index] = true;
1475         }
1476
1477         keep_shader_info = device->enabled_extensions.AMD_shader_info;
1478
1479         /* With update after bind we can't attach bo's to the command buffer
1480          * from the descriptor set anymore, so we have to use a global BO list.
1481          */
1482         device->use_global_bo_list =
1483                 device->enabled_extensions.EXT_descriptor_indexing;
1484
1485         mtx_init(&device->shader_slab_mutex, mtx_plain);
1486         list_inithead(&device->shader_slabs);
1487
1488         radv_bo_list_init(&device->bo_list);
1489
1490         for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1491                 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1492                 uint32_t qfi = queue_create->queueFamilyIndex;
1493                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1494                         vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1495
1496                 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1497
1498                 device->queues[qfi] = vk_alloc(&device->alloc,
1499                                                queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1500                 if (!device->queues[qfi]) {
1501                         result = VK_ERROR_OUT_OF_HOST_MEMORY;
1502                         goto fail;
1503                 }
1504
1505                 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1506
1507                 device->queue_count[qfi] = queue_create->queueCount;
1508
1509                 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1510                         result = radv_queue_init(device, &device->queues[qfi][q],
1511                                                  qfi, q, queue_create->flags,
1512                                                  global_priority);
1513                         if (result != VK_SUCCESS)
1514                                 goto fail;
1515                 }
1516         }
1517
1518         device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1519                         ((device->instance->perftest_flags & RADV_PERFTEST_BINNING) ||
1520                          device->physical_device->rad_info.family == CHIP_RAVEN);
1521
1522         /* Disabled and not implemented for now. */
1523         device->dfsm_allowed = device->pbb_allowed &&
1524                                device->physical_device->rad_info.family == CHIP_RAVEN;
1525
1526 #ifdef ANDROID
1527         device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1528 #endif
1529
1530         /* The maximum number of scratch waves. Scratch space isn't divided
1531          * evenly between CUs. The number is only a function of the number of CUs.
1532          * We can decrease the constant to decrease the scratch buffer size.
1533          *
1534          * sctx->scratch_waves must be >= the maximum possible size of
1535          * 1 threadgroup, so that the hw doesn't hang from being unable
1536          * to start any.
1537          *
1538          * The recommended value is 4 per CU at most. Higher numbers don't
1539          * bring much benefit, but they still occupy chip resources (think
1540          * async compute). I've seen ~2% performance difference between 4 and 32.
1541          */
1542         uint32_t max_threads_per_block = 2048;
1543         device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1544                                      max_threads_per_block / 64);
1545
1546         device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1547
1548         if (device->physical_device->rad_info.chip_class >= CIK) {
1549                 /* If the KMD allows it (there is a KMD hw register for it),
1550                  * allow launching waves out-of-order.
1551                  */
1552                 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1553         }
1554
1555         radv_device_init_gs_info(device);
1556
1557         device->tess_offchip_block_dw_size =
1558                 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1559         device->has_distributed_tess =
1560                 device->physical_device->rad_info.chip_class >= VI &&
1561                 device->physical_device->rad_info.max_se >= 2;
1562
1563         if (getenv("RADV_TRACE_FILE")) {
1564                 const char *filename = getenv("RADV_TRACE_FILE");
1565
1566                 keep_shader_info = true;
1567
1568                 if (!radv_init_trace(device))
1569                         goto fail;
1570
1571                 fprintf(stderr, "*****************************************************************************\n");
1572                 fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
1573                 fprintf(stderr, "*****************************************************************************\n");
1574
1575                 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
1576                 radv_dump_enabled_options(device, stderr);
1577         }
1578
1579         device->keep_shader_info = keep_shader_info;
1580
1581         result = radv_device_init_meta(device);
1582         if (result != VK_SUCCESS)
1583                 goto fail;
1584
1585         radv_device_init_msaa(device);
1586
1587         for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1588                 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1589                 switch (family) {
1590                 case RADV_QUEUE_GENERAL:
1591                         radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1592                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1593                         radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1594                         break;
1595                 case RADV_QUEUE_COMPUTE:
1596                         radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1597                         radeon_emit(device->empty_cs[family], 0);
1598                         break;
1599                 }
1600                 device->ws->cs_finalize(device->empty_cs[family]);
1601         }
1602
1603         if (device->physical_device->rad_info.chip_class >= CIK)
1604                 cik_create_gfx_config(device);
1605
1606         VkPipelineCacheCreateInfo ci;
1607         ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1608         ci.pNext = NULL;
1609         ci.flags = 0;
1610         ci.pInitialData = NULL;
1611         ci.initialDataSize = 0;
1612         VkPipelineCache pc;
1613         result = radv_CreatePipelineCache(radv_device_to_handle(device),
1614                                           &ci, NULL, &pc);
1615         if (result != VK_SUCCESS)
1616                 goto fail_meta;
1617
1618         device->mem_cache = radv_pipeline_cache_from_handle(pc);
1619
1620         *pDevice = radv_device_to_handle(device);
1621         return VK_SUCCESS;
1622
1623 fail_meta:
1624         radv_device_finish_meta(device);
1625 fail:
1626         radv_bo_list_finish(&device->bo_list);
1627
1628         if (device->trace_bo)
1629                 device->ws->buffer_destroy(device->trace_bo);
1630
1631         if (device->gfx_init)
1632                 device->ws->buffer_destroy(device->gfx_init);
1633
1634         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1635                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1636                         radv_queue_finish(&device->queues[i][q]);
1637                 if (device->queue_count[i])
1638                         vk_free(&device->alloc, device->queues[i]);
1639         }
1640
1641         vk_free(&device->alloc, device);
1642         return result;
1643 }
1644
1645 void radv_DestroyDevice(
1646         VkDevice                                    _device,
1647         const VkAllocationCallbacks*                pAllocator)
1648 {
1649         RADV_FROM_HANDLE(radv_device, device, _device);
1650
1651         if (!device)
1652                 return;
1653
1654         if (device->trace_bo)
1655                 device->ws->buffer_destroy(device->trace_bo);
1656
1657         if (device->gfx_init)
1658                 device->ws->buffer_destroy(device->gfx_init);
1659
1660         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1661                 for (unsigned q = 0; q < device->queue_count[i]; q++)
1662                         radv_queue_finish(&device->queues[i][q]);
1663                 if (device->queue_count[i])
1664                         vk_free(&device->alloc, device->queues[i]);
1665                 if (device->empty_cs[i])
1666                         device->ws->cs_destroy(device->empty_cs[i]);
1667         }
1668         radv_device_finish_meta(device);
1669
1670         VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1671         radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1672
1673         radv_destroy_shader_slabs(device);
1674
1675         radv_bo_list_finish(&device->bo_list);
1676         vk_free(&device->alloc, device);
1677 }
1678
1679 VkResult radv_EnumerateInstanceLayerProperties(
1680         uint32_t*                                   pPropertyCount,
1681         VkLayerProperties*                          pProperties)
1682 {
1683         if (pProperties == NULL) {
1684                 *pPropertyCount = 0;
1685                 return VK_SUCCESS;
1686         }
1687
1688         /* None supported at this time */
1689         return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1690 }
1691
1692 VkResult radv_EnumerateDeviceLayerProperties(
1693         VkPhysicalDevice                            physicalDevice,
1694         uint32_t*                                   pPropertyCount,
1695         VkLayerProperties*                          pProperties)
1696 {
1697         if (pProperties == NULL) {
1698                 *pPropertyCount = 0;
1699                 return VK_SUCCESS;
1700         }
1701
1702         /* None supported at this time */
1703         return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1704 }
1705
1706 void radv_GetDeviceQueue2(
1707         VkDevice                                    _device,
1708         const VkDeviceQueueInfo2*                   pQueueInfo,
1709         VkQueue*                                    pQueue)
1710 {
1711         RADV_FROM_HANDLE(radv_device, device, _device);
1712         struct radv_queue *queue;
1713
1714         queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
1715         if (pQueueInfo->flags != queue->flags) {
1716                 /* From the Vulkan 1.1.70 spec:
1717                  *
1718                  * "The queue returned by vkGetDeviceQueue2 must have the same
1719                  * flags value from this structure as that used at device
1720                  * creation time in a VkDeviceQueueCreateInfo instance. If no
1721                  * matching flags were specified at device creation time then
1722                  * pQueue will return VK_NULL_HANDLE."
1723                  */
1724                 *pQueue = VK_NULL_HANDLE;
1725                 return;
1726         }
1727
1728         *pQueue = radv_queue_to_handle(queue);
1729 }
1730
1731 void radv_GetDeviceQueue(
1732         VkDevice                                    _device,
1733         uint32_t                                    queueFamilyIndex,
1734         uint32_t                                    queueIndex,
1735         VkQueue*                                    pQueue)
1736 {
1737         const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1738                 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1739                 .queueFamilyIndex = queueFamilyIndex,
1740                 .queueIndex = queueIndex
1741         };
1742
1743         radv_GetDeviceQueue2(_device, &info, pQueue);
1744 }
1745
1746 static void
1747 fill_geom_tess_rings(struct radv_queue *queue,
1748                      uint32_t *map,
1749                      bool add_sample_positions,
1750                      uint32_t esgs_ring_size,
1751                      struct radeon_winsys_bo *esgs_ring_bo,
1752                      uint32_t gsvs_ring_size,
1753                      struct radeon_winsys_bo *gsvs_ring_bo,
1754                      uint32_t tess_factor_ring_size,
1755                      uint32_t tess_offchip_ring_offset,
1756                      uint32_t tess_offchip_ring_size,
1757                      struct radeon_winsys_bo *tess_rings_bo)
1758 {
1759         uint64_t esgs_va = 0, gsvs_va = 0;
1760         uint64_t tess_va = 0, tess_offchip_va = 0;
1761         uint32_t *desc = &map[4];
1762
1763         if (esgs_ring_bo)
1764                 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1765         if (gsvs_ring_bo)
1766                 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1767         if (tess_rings_bo) {
1768                 tess_va = radv_buffer_get_va(tess_rings_bo);
1769                 tess_offchip_va = tess_va + tess_offchip_ring_offset;
1770         }
1771
1772         /* stride 0, num records - size, add tid, swizzle, elsize4,
1773            index stride 64 */
1774         desc[0] = esgs_va;
1775         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1776                 S_008F04_STRIDE(0) |
1777                 S_008F04_SWIZZLE_ENABLE(true);
1778         desc[2] = esgs_ring_size;
1779         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1780                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1781                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1782                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1783                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1784                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1785                 S_008F0C_ELEMENT_SIZE(1) |
1786                 S_008F0C_INDEX_STRIDE(3) |
1787                 S_008F0C_ADD_TID_ENABLE(true);
1788
1789         desc += 4;
1790         /* GS entry for ES->GS ring */
1791         /* stride 0, num records - size, elsize0,
1792            index stride 0 */
1793         desc[0] = esgs_va;
1794         desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1795                 S_008F04_STRIDE(0) |
1796                 S_008F04_SWIZZLE_ENABLE(false);
1797         desc[2] = esgs_ring_size;
1798         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1799                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1800                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1801                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1802                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1803                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1804                 S_008F0C_ELEMENT_SIZE(0) |
1805                 S_008F0C_INDEX_STRIDE(0) |
1806                 S_008F0C_ADD_TID_ENABLE(false);
1807
1808         desc += 4;
1809         /* VS entry for GS->VS ring */
1810         /* stride 0, num records - size, elsize0,
1811            index stride 0 */
1812         desc[0] = gsvs_va;
1813         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1814                 S_008F04_STRIDE(0) |
1815                 S_008F04_SWIZZLE_ENABLE(false);
1816         desc[2] = gsvs_ring_size;
1817         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1818                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1819                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1820                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1821                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1822                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1823                 S_008F0C_ELEMENT_SIZE(0) |
1824                 S_008F0C_INDEX_STRIDE(0) |
1825                 S_008F0C_ADD_TID_ENABLE(false);
1826         desc += 4;
1827
1828         /* stride gsvs_itemsize, num records 64
1829            elsize 4, index stride 16 */
1830         /* shader will patch stride and desc[2] */
1831         desc[0] = gsvs_va;
1832         desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1833                 S_008F04_STRIDE(0) |
1834                 S_008F04_SWIZZLE_ENABLE(true);
1835         desc[2] = 0;
1836         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1837                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1838                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1839                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1840                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1841                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1842                 S_008F0C_ELEMENT_SIZE(1) |
1843                 S_008F0C_INDEX_STRIDE(1) |
1844                 S_008F0C_ADD_TID_ENABLE(true);
1845         desc += 4;
1846
1847         desc[0] = tess_va;
1848         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
1849                 S_008F04_STRIDE(0) |
1850                 S_008F04_SWIZZLE_ENABLE(false);
1851         desc[2] = tess_factor_ring_size;
1852         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1853                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1854                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1855                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1856                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1857                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1858                 S_008F0C_ELEMENT_SIZE(0) |
1859                 S_008F0C_INDEX_STRIDE(0) |
1860                 S_008F0C_ADD_TID_ENABLE(false);
1861         desc += 4;
1862
1863         desc[0] = tess_offchip_va;
1864         desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1865                 S_008F04_STRIDE(0) |
1866                 S_008F04_SWIZZLE_ENABLE(false);
1867         desc[2] = tess_offchip_ring_size;
1868         desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1869                 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1870                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1871                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1872                 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1873                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1874                 S_008F0C_ELEMENT_SIZE(0) |
1875                 S_008F0C_INDEX_STRIDE(0) |
1876                 S_008F0C_ADD_TID_ENABLE(false);
1877         desc += 4;
1878
1879         /* add sample positions after all rings */
1880         memcpy(desc, queue->device->sample_locations_1x, 8);
1881         desc += 2;
1882         memcpy(desc, queue->device->sample_locations_2x, 16);
1883         desc += 4;
1884         memcpy(desc, queue->device->sample_locations_4x, 32);
1885         desc += 8;
1886         memcpy(desc, queue->device->sample_locations_8x, 64);
1887         desc += 16;
1888         memcpy(desc, queue->device->sample_locations_16x, 128);
1889 }
1890
1891 static unsigned
1892 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1893 {
1894         bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1895                 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1896                 device->physical_device->rad_info.family != CHIP_STONEY;
1897         unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1898         unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1899                 device->physical_device->rad_info.max_se;
1900         unsigned offchip_granularity;
1901         unsigned hs_offchip_param;
1902         switch (device->tess_offchip_block_dw_size) {
1903         default:
1904                 assert(0);
1905                 /* fall through */
1906         case 8192:
1907                 offchip_granularity = V_03093C_X_8K_DWORDS;
1908                 break;
1909         case 4096:
1910                 offchip_granularity = V_03093C_X_4K_DWORDS;
1911                 break;
1912         }
1913
1914         switch (device->physical_device->rad_info.chip_class) {
1915         case SI:
1916                 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1917                 break;
1918         case CIK:
1919         case VI:
1920         case GFX9:
1921         default:
1922                 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1923                 break;
1924         }
1925
1926         *max_offchip_buffers_p = max_offchip_buffers;
1927         if (device->physical_device->rad_info.chip_class >= CIK) {
1928                 if (device->physical_device->rad_info.chip_class >= VI)
1929                         --max_offchip_buffers;
1930                 hs_offchip_param =
1931                         S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1932                         S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1933         } else {
1934                 hs_offchip_param =
1935                         S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1936         }
1937         return hs_offchip_param;
1938 }
1939
1940 static void
1941 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
1942                         struct radeon_winsys_bo *esgs_ring_bo,
1943                         uint32_t esgs_ring_size,
1944                         struct radeon_winsys_bo *gsvs_ring_bo,
1945                         uint32_t gsvs_ring_size)
1946 {
1947         if (!esgs_ring_bo && !gsvs_ring_bo)
1948                 return;
1949
1950         if (esgs_ring_bo)
1951                 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
1952
1953         if (gsvs_ring_bo)
1954                 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
1955
1956         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1957                 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1958                 radeon_emit(cs, esgs_ring_size >> 8);
1959                 radeon_emit(cs, gsvs_ring_size >> 8);
1960         } else {
1961                 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1962                 radeon_emit(cs, esgs_ring_size >> 8);
1963                 radeon_emit(cs, gsvs_ring_size >> 8);
1964         }
1965 }
1966
1967 static void
1968 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
1969                            unsigned hs_offchip_param, unsigned tf_ring_size,
1970                            struct radeon_winsys_bo *tess_rings_bo)
1971 {
1972         uint64_t tf_va;
1973
1974         if (!tess_rings_bo)
1975                 return;
1976
1977         tf_va = radv_buffer_get_va(tess_rings_bo);
1978
1979         radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
1980
1981         if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1982                 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1983                                        S_030938_SIZE(tf_ring_size / 4));
1984                 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1985                                        tf_va >> 8);
1986                 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1987                         radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1988                                                S_030944_BASE_HI(tf_va >> 40));
1989                 }
1990                 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
1991                                        hs_offchip_param);
1992         } else {
1993                 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1994                                       S_008988_SIZE(tf_ring_size / 4));
1995                 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1996                                       tf_va >> 8);
1997                 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1998                                      hs_offchip_param);
1999         }
2000 }
2001
2002 static void
2003 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2004                           struct radeon_winsys_bo *compute_scratch_bo)
2005 {
2006         uint64_t scratch_va;
2007
2008         if (!compute_scratch_bo)
2009                 return;
2010
2011         scratch_va = radv_buffer_get_va(compute_scratch_bo);
2012
2013         radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
2014
2015         radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
2016         radeon_emit(cs, scratch_va);
2017         radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2018                         S_008F04_SWIZZLE_ENABLE(1));
2019 }
2020
2021 static void
2022 radv_emit_global_shader_pointers(struct radv_queue *queue,
2023                                  struct radeon_cmdbuf *cs,
2024                                  struct radeon_winsys_bo *descriptor_bo)
2025 {
2026         uint64_t va;
2027
2028         if (!descriptor_bo)
2029                 return;
2030
2031         va = radv_buffer_get_va(descriptor_bo);
2032
2033         radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
2034
2035         if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2036                 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2037                                    R_00B130_SPI_SHADER_USER_DATA_VS_0,
2038                                    R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2039                                    R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2040
2041                 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2042                         radv_emit_shader_pointer(queue->device, cs, regs[i],
2043                                                  va, true);
2044                 }
2045         } else {
2046                 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2047                                    R_00B130_SPI_SHADER_USER_DATA_VS_0,
2048                                    R_00B230_SPI_SHADER_USER_DATA_GS_0,
2049                                    R_00B330_SPI_SHADER_USER_DATA_ES_0,
2050                                    R_00B430_SPI_SHADER_USER_DATA_HS_0,
2051                                    R_00B530_SPI_SHADER_USER_DATA_LS_0};
2052
2053                 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2054                         radv_emit_shader_pointer(queue->device, cs, regs[i],
2055                                                  va, true);
2056                 }
2057         }
2058 }
2059
2060 static VkResult
2061 radv_get_preamble_cs(struct radv_queue *queue,
2062                      uint32_t scratch_size,
2063                      uint32_t compute_scratch_size,
2064                      uint32_t esgs_ring_size,
2065                      uint32_t gsvs_ring_size,
2066                      bool needs_tess_rings,
2067                      bool needs_sample_positions,
2068                      struct radeon_cmdbuf **initial_full_flush_preamble_cs,
2069                      struct radeon_cmdbuf **initial_preamble_cs,
2070                      struct radeon_cmdbuf **continue_preamble_cs)
2071 {
2072         struct radeon_winsys_bo *scratch_bo = NULL;
2073         struct radeon_winsys_bo *descriptor_bo = NULL;
2074         struct radeon_winsys_bo *compute_scratch_bo = NULL;
2075         struct radeon_winsys_bo *esgs_ring_bo = NULL;
2076         struct radeon_winsys_bo *gsvs_ring_bo = NULL;
2077         struct radeon_winsys_bo *tess_rings_bo = NULL;
2078         struct radeon_cmdbuf *dest_cs[3] = {0};
2079         bool add_tess_rings = false, add_sample_positions = false;
2080         unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
2081         unsigned max_offchip_buffers;
2082         unsigned hs_offchip_param = 0;
2083         unsigned tess_offchip_ring_offset;
2084         uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
2085         if (!queue->has_tess_rings) {
2086                 if (needs_tess_rings)
2087                         add_tess_rings = true;
2088         }
2089         if (!queue->has_sample_positions) {
2090                 if (needs_sample_positions)
2091                         add_sample_positions = true;
2092         }
2093         tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
2094         hs_offchip_param = radv_get_hs_offchip_param(queue->device,
2095                                                      &max_offchip_buffers);
2096         tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
2097         tess_offchip_ring_size = max_offchip_buffers *
2098                 queue->device->tess_offchip_block_dw_size * 4;
2099
2100         if (scratch_size <= queue->scratch_size &&
2101             compute_scratch_size <= queue->compute_scratch_size &&
2102             esgs_ring_size <= queue->esgs_ring_size &&
2103             gsvs_ring_size <= queue->gsvs_ring_size &&
2104             !add_tess_rings && !add_sample_positions &&
2105             queue->initial_preamble_cs) {
2106                 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2107                 *initial_preamble_cs = queue->initial_preamble_cs;
2108                 *continue_preamble_cs = queue->continue_preamble_cs;
2109                 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2110                         *continue_preamble_cs = NULL;
2111                 return VK_SUCCESS;
2112         }
2113
2114         if (scratch_size > queue->scratch_size) {
2115                 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2116                                                               scratch_size,
2117                                                               4096,
2118                                                               RADEON_DOMAIN_VRAM,
2119                                                               ring_bo_flags);
2120                 if (!scratch_bo)
2121                         goto fail;
2122         } else
2123                 scratch_bo = queue->scratch_bo;
2124
2125         if (compute_scratch_size > queue->compute_scratch_size) {
2126                 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2127                                                                       compute_scratch_size,
2128                                                                       4096,
2129                                                                       RADEON_DOMAIN_VRAM,
2130                                                                       ring_bo_flags);
2131                 if (!compute_scratch_bo)
2132                         goto fail;
2133
2134         } else
2135                 compute_scratch_bo = queue->compute_scratch_bo;
2136
2137         if (esgs_ring_size > queue->esgs_ring_size) {
2138                 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2139                                                                 esgs_ring_size,
2140                                                                 4096,
2141                                                                 RADEON_DOMAIN_VRAM,
2142                                                                 ring_bo_flags);
2143                 if (!esgs_ring_bo)
2144                         goto fail;
2145         } else {
2146                 esgs_ring_bo = queue->esgs_ring_bo;
2147                 esgs_ring_size = queue->esgs_ring_size;
2148         }
2149
2150         if (gsvs_ring_size > queue->gsvs_ring_size) {
2151                 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2152                                                                 gsvs_ring_size,
2153                                                                 4096,
2154                                                                 RADEON_DOMAIN_VRAM,
2155                                                                 ring_bo_flags);
2156                 if (!gsvs_ring_bo)
2157                         goto fail;
2158         } else {
2159                 gsvs_ring_bo = queue->gsvs_ring_bo;
2160                 gsvs_ring_size = queue->gsvs_ring_size;
2161         }
2162
2163         if (add_tess_rings) {
2164                 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
2165                                                                  tess_offchip_ring_offset + tess_offchip_ring_size,
2166                                                                  256,
2167                                                                  RADEON_DOMAIN_VRAM,
2168                                                                  ring_bo_flags);
2169                 if (!tess_rings_bo)
2170                         goto fail;
2171         } else {
2172                 tess_rings_bo = queue->tess_rings_bo;
2173         }
2174
2175         if (scratch_bo != queue->scratch_bo ||
2176             esgs_ring_bo != queue->esgs_ring_bo ||
2177             gsvs_ring_bo != queue->gsvs_ring_bo ||
2178             tess_rings_bo != queue->tess_rings_bo ||
2179             add_sample_positions) {
2180                 uint32_t size = 0;
2181                 if (gsvs_ring_bo || esgs_ring_bo ||
2182                     tess_rings_bo || add_sample_positions) {
2183                         size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
2184                         if (add_sample_positions)
2185                                 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
2186                 }
2187                 else if (scratch_bo)
2188                         size = 8; /* 2 dword */
2189
2190                 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
2191                                                                  size,
2192                                                                  4096,
2193                                                                  RADEON_DOMAIN_VRAM,
2194                                                                  RADEON_FLAG_CPU_ACCESS |
2195                                                                  RADEON_FLAG_NO_INTERPROCESS_SHARING |
2196                                                                  RADEON_FLAG_READ_ONLY);
2197                 if (!descriptor_bo)
2198                         goto fail;
2199         } else
2200                 descriptor_bo = queue->descriptor_bo;
2201
2202         for(int i = 0; i < 3; ++i) {
2203                 struct radeon_cmdbuf *cs = NULL;
2204                 cs = queue->device->ws->cs_create(queue->device->ws,
2205                                                   queue->queue_family_index ? RING_COMPUTE : RING_GFX);
2206                 if (!cs)
2207                         goto fail;
2208
2209                 dest_cs[i] = cs;
2210
2211                 if (scratch_bo)
2212                         radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
2213
2214                 if (descriptor_bo != queue->descriptor_bo) {
2215                         uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
2216
2217                         if (scratch_bo) {
2218                                 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
2219                                 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2220                                                  S_008F04_SWIZZLE_ENABLE(1);
2221                                 map[0] = scratch_va;
2222                                 map[1] = rsrc1;
2223                         }
2224
2225                         if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
2226                             add_sample_positions)
2227                                 fill_geom_tess_rings(queue, map, add_sample_positions,
2228                                                      esgs_ring_size, esgs_ring_bo,
2229                                                      gsvs_ring_size, gsvs_ring_bo,
2230                                                      tess_factor_ring_size,
2231                                                      tess_offchip_ring_offset,
2232                                                      tess_offchip_ring_size,
2233                                                      tess_rings_bo);
2234
2235                         queue->device->ws->buffer_unmap(descriptor_bo);
2236                 }
2237
2238                 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
2239                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2240                         radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2241                         radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2242                         radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
2243                 }
2244
2245                 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
2246                                         gsvs_ring_bo, gsvs_ring_size);
2247                 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
2248                                            tess_factor_ring_size, tess_rings_bo);
2249                 radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
2250                 radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
2251
2252                 if (i == 0) {
2253                         si_cs_emit_cache_flush(cs,
2254                                                queue->device->physical_device->rad_info.chip_class,
2255                                                NULL, 0,
2256                                                queue->queue_family_index == RING_COMPUTE &&
2257                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
2258                                                (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
2259                                                RADV_CMD_FLAG_INV_ICACHE |
2260                                                RADV_CMD_FLAG_INV_SMEM_L1 |
2261                                                RADV_CMD_FLAG_INV_VMEM_L1 |
2262                                                RADV_CMD_FLAG_INV_GLOBAL_L2 |
2263                                                RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2264                 } else if (i == 1) {
2265                         si_cs_emit_cache_flush(cs,
2266                                                queue->device->physical_device->rad_info.chip_class,
2267                                                NULL, 0,
2268                                                queue->queue_family_index == RING_COMPUTE &&
2269                                                  queue->device->physical_device->rad_info.chip_class >= CIK,
2270                                                RADV_CMD_FLAG_INV_ICACHE |
2271                                                RADV_CMD_FLAG_INV_SMEM_L1 |
2272                                                RADV_CMD_FLAG_INV_VMEM_L1 |
2273                                                RADV_CMD_FLAG_INV_GLOBAL_L2 |
2274                                                RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2275                 }
2276
2277                 if (!queue->device->ws->cs_finalize(cs))
2278                         goto fail;
2279         }
2280
2281         if (queue->initial_full_flush_preamble_cs)
2282                         queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2283
2284         if (queue->initial_preamble_cs)
2285                         queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2286
2287         if (queue->continue_preamble_cs)
2288                         queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2289
2290         queue->initial_full_flush_preamble_cs = dest_cs[0];
2291         queue->initial_preamble_cs = dest_cs[1];
2292         queue->continue_preamble_cs = dest_cs[2];
2293
2294         if (scratch_bo != queue->scratch_bo) {
2295                 if (queue->scratch_bo)
2296                         queue->device->ws->buffer_destroy(queue->scratch_bo);
2297                 queue->scratch_bo = scratch_bo;
2298                 queue->scratch_size = scratch_size;
2299         }
2300
2301         if (compute_scratch_bo != queue->compute_scratch_bo) {
2302                 if (queue->compute_scratch_bo)
2303                         queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2304                 queue->compute_scratch_bo = compute_scratch_bo;
2305                 queue->compute_scratch_size = compute_scratch_size;
2306         }
2307
2308         if (esgs_ring_bo != queue->esgs_ring_bo) {
2309                 if (queue->esgs_ring_bo)
2310                         queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2311                 queue->esgs_ring_bo = esgs_ring_bo;
2312                 queue->esgs_ring_size = esgs_ring_size;
2313         }
2314
2315         if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2316                 if (queue->gsvs_ring_bo)
2317                         queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2318                 queue->gsvs_ring_bo = gsvs_ring_bo;
2319                 queue->gsvs_ring_size = gsvs_ring_size;
2320         }
2321
2322         if (tess_rings_bo != queue->tess_rings_bo) {
2323                 queue->tess_rings_bo = tess_rings_bo;
2324                 queue->has_tess_rings = true;
2325         }
2326
2327         if (descriptor_bo != queue->descriptor_bo) {
2328                 if (queue->descriptor_bo)
2329                         queue->device->ws->buffer_destroy(queue->descriptor_bo);
2330
2331                 queue->descriptor_bo = descriptor_bo;
2332         }
2333
2334         if (add_sample_positions)
2335                 queue->has_sample_positions = true;
2336
2337         *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2338         *initial_preamble_cs = queue->initial_preamble_cs;
2339         *continue_preamble_cs = queue->continue_preamble_cs;
2340         if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2341                         *continue_preamble_cs = NULL;
2342         return VK_SUCCESS;
2343 fail:
2344         for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2345                 if (dest_cs[i])
2346                         queue->device->ws->cs_destroy(dest_cs[i]);
2347         if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2348                 queue->device->ws->buffer_destroy(descriptor_bo);
2349         if (scratch_bo && scratch_bo != queue->scratch_bo)
2350                 queue->device->ws->buffer_destroy(scratch_bo);
2351         if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2352                 queue->device->ws->buffer_destroy(compute_scratch_bo);
2353         if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2354                 queue->device->ws->buffer_destroy(esgs_ring_bo);
2355         if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2356                 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2357         if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2358                 queue->device->ws->buffer_destroy(tess_rings_bo);
2359         return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2360 }
2361
2362 static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
2363                                       struct radv_winsys_sem_counts *counts,
2364                                       int num_sems,
2365                                       const VkSemaphore *sems,
2366                                       VkFence _fence,
2367                                       bool reset_temp)
2368 {
2369         int syncobj_idx = 0, sem_idx = 0;
2370
2371         if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2372                 return VK_SUCCESS;
2373
2374         for (uint32_t i = 0; i < num_sems; i++) {
2375                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2376
2377                 if (sem->temp_syncobj || sem->syncobj)
2378                         counts->syncobj_count++;
2379                 else
2380                         counts->sem_count++;
2381         }
2382
2383         if (_fence != VK_NULL_HANDLE) {
2384                 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2385                 if (fence->temp_syncobj || fence->syncobj)
2386                         counts->syncobj_count++;
2387         }
2388
2389         if (counts->syncobj_count) {
2390                 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2391                 if (!counts->syncobj)
2392                         return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2393         }
2394
2395         if (counts->sem_count) {
2396                 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2397                 if (!counts->sem) {
2398                         free(counts->syncobj);
2399                         return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2400                 }
2401         }
2402
2403         for (uint32_t i = 0; i < num_sems; i++) {
2404                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2405
2406                 if (sem->temp_syncobj) {
2407                         counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2408                 }
2409                 else if (sem->syncobj)
2410                         counts->syncobj[syncobj_idx++] = sem->syncobj;
2411                 else {
2412                         assert(sem->sem);
2413                         counts->sem[sem_idx++] = sem->sem;
2414                 }
2415         }
2416
2417         if (_fence != VK_NULL_HANDLE) {
2418                 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2419                 if (fence->temp_syncobj)
2420                         counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2421                 else if (fence->syncobj)
2422                         counts->syncobj[syncobj_idx++] = fence->syncobj;
2423         }
2424
2425         return VK_SUCCESS;
2426 }
2427
2428 static void
2429 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2430 {
2431         free(sem_info->wait.syncobj);
2432         free(sem_info->wait.sem);
2433         free(sem_info->signal.syncobj);
2434         free(sem_info->signal.sem);
2435 }
2436
2437
2438 static void radv_free_temp_syncobjs(struct radv_device *device,
2439                                     int num_sems,
2440                                     const VkSemaphore *sems)
2441 {
2442         for (uint32_t i = 0; i < num_sems; i++) {
2443                 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2444
2445                 if (sem->temp_syncobj) {
2446                         device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2447                         sem->temp_syncobj = 0;
2448                 }
2449         }
2450 }
2451
2452 static VkResult
2453 radv_alloc_sem_info(struct radv_instance *instance,
2454                     struct radv_winsys_sem_info *sem_info,
2455                     int num_wait_sems,
2456                     const VkSemaphore *wait_sems,
2457                     int num_signal_sems,
2458                     const VkSemaphore *signal_sems,
2459                     VkFence fence)
2460 {
2461         VkResult ret;
2462         memset(sem_info, 0, sizeof(*sem_info));
2463
2464         ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2465         if (ret)
2466                 return ret;
2467         ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false);
2468         if (ret)
2469                 radv_free_sem_info(sem_info);
2470
2471         /* caller can override these */
2472         sem_info->cs_emit_wait = true;
2473         sem_info->cs_emit_signal = true;
2474         return ret;
2475 }
2476
2477 /* Signals fence as soon as all the work currently put on queue is done. */
2478 static VkResult radv_signal_fence(struct radv_queue *queue,
2479                               struct radv_fence *fence)
2480 {
2481         int ret;
2482         VkResult result;
2483         struct radv_winsys_sem_info sem_info;
2484
2485         result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL,
2486                                      radv_fence_to_handle(fence));
2487         if (result != VK_SUCCESS)
2488                 return result;
2489
2490         ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2491                                            &queue->device->empty_cs[queue->queue_family_index],
2492                                            1, NULL, NULL, &sem_info, NULL,
2493                                            false, fence->fence);
2494         radv_free_sem_info(&sem_info);
2495
2496         if (ret)
2497                 return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
2498
2499         return VK_SUCCESS;
2500 }
2501
2502 VkResult radv_QueueSubmit(
2503         VkQueue                                     _queue,
2504         uint32_t                                    submitCount,
2505         const VkSubmitInfo*                         pSubmits,
2506         VkFence                                     _fence)
2507 {
2508         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2509         RADV_FROM_HANDLE(radv_fence, fence, _fence);
2510         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2511         struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2512         int ret;
2513         uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2514         uint32_t scratch_size = 0;
2515         uint32_t compute_scratch_size = 0;
2516         uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2517         struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2518         VkResult result;
2519         bool fence_emitted = false;
2520         bool tess_rings_needed = false;
2521         bool sample_positions_needed = false;
2522
2523         /* Do this first so failing to allocate scratch buffers can't result in
2524          * partially executed submissions. */
2525         for (uint32_t i = 0; i < submitCount; i++) {
2526                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2527                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2528                                          pSubmits[i].pCommandBuffers[j]);
2529
2530                         scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2531                         compute_scratch_size = MAX2(compute_scratch_size,
2532                                                     cmd_buffer->compute_scratch_size_needed);
2533                         esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2534                         gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2535                         tess_rings_needed |= cmd_buffer->tess_rings_needed;
2536                         sample_positions_needed |= cmd_buffer->sample_positions_needed;
2537                 }
2538         }
2539
2540         result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2541                                       esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2542                                       sample_positions_needed, &initial_flush_preamble_cs,
2543                                       &initial_preamble_cs, &continue_preamble_cs);
2544         if (result != VK_SUCCESS)
2545                 return result;
2546
2547         for (uint32_t i = 0; i < submitCount; i++) {
2548                 struct radeon_cmdbuf **cs_array;
2549                 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2550                 bool can_patch = true;
2551                 uint32_t advance;
2552                 struct radv_winsys_sem_info sem_info;
2553
2554                 result = radv_alloc_sem_info(queue->device->instance,
2555                                              &sem_info,
2556                                              pSubmits[i].waitSemaphoreCount,
2557                                              pSubmits[i].pWaitSemaphores,
2558                                              pSubmits[i].signalSemaphoreCount,
2559                                              pSubmits[i].pSignalSemaphores,
2560                                              _fence);
2561                 if (result != VK_SUCCESS)
2562                         return result;
2563
2564                 if (!pSubmits[i].commandBufferCount) {
2565                         if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2566                                 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2567                                                                    &queue->device->empty_cs[queue->queue_family_index],
2568                                                                    1, NULL, NULL,
2569                                                                    &sem_info, NULL,
2570                                                                    false, base_fence);
2571                                 if (ret) {
2572                                         radv_loge("failed to submit CS %d\n", i);
2573                                         abort();
2574                                 }
2575                                 fence_emitted = true;
2576                         }
2577                         radv_free_sem_info(&sem_info);
2578                         continue;
2579                 }
2580
2581                 cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
2582                                                 (pSubmits[i].commandBufferCount));
2583
2584                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2585                         RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2586                                          pSubmits[i].pCommandBuffers[j]);
2587                         assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2588
2589                         cs_array[j] = cmd_buffer->cs;
2590                         if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2591                                 can_patch = false;
2592
2593                         cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2594                 }
2595
2596                 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2597                         struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2598                         const struct radv_winsys_bo_list *bo_list = NULL;
2599
2600                         advance = MIN2(max_cs_submission,
2601                                        pSubmits[i].commandBufferCount - j);
2602
2603                         if (queue->device->trace_bo)
2604                                 *queue->device->trace_id_ptr = 0;
2605
2606                         sem_info.cs_emit_wait = j == 0;
2607                         sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2608
2609                         if (unlikely(queue->device->use_global_bo_list)) {
2610                                 pthread_mutex_lock(&queue->device->bo_list.mutex);
2611                                 bo_list = &queue->device->bo_list.list;
2612                         }
2613
2614                         ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2615                                                         advance, initial_preamble, continue_preamble_cs,
2616                                                         &sem_info, bo_list,
2617                                                         can_patch, base_fence);
2618
2619                         if (unlikely(queue->device->use_global_bo_list))
2620                                 pthread_mutex_unlock(&queue->device->bo_list.mutex);
2621
2622                         if (ret) {
2623                                 radv_loge("failed to submit CS %d\n", i);
2624                                 abort();
2625                         }
2626                         fence_emitted = true;
2627                         if (queue->device->trace_bo) {
2628                                 radv_check_gpu_hangs(queue, cs_array[j]);
2629                         }
2630                 }
2631
2632                 radv_free_temp_syncobjs(queue->device,
2633                                         pSubmits[i].waitSemaphoreCount,
2634                                         pSubmits[i].pWaitSemaphores);
2635                 radv_free_sem_info(&sem_info);
2636                 free(cs_array);
2637         }
2638
2639         if (fence) {
2640                 if (!fence_emitted) {
2641                         result = radv_signal_fence(queue, fence);
2642                         if (result != VK_SUCCESS)
2643                                 return result;
2644                 }
2645                 fence->submitted = true;
2646         }
2647
2648         return VK_SUCCESS;
2649 }
2650
2651 VkResult radv_QueueWaitIdle(
2652         VkQueue                                     _queue)
2653 {
2654         RADV_FROM_HANDLE(radv_queue, queue, _queue);
2655
2656         queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2657                                          radv_queue_family_to_ring(queue->queue_family_index),
2658                                          queue->queue_idx);
2659         return VK_SUCCESS;
2660 }
2661
2662 VkResult radv_DeviceWaitIdle(
2663         VkDevice                                    _device)
2664 {
2665         RADV_FROM_HANDLE(radv_device, device, _device);
2666
2667         for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2668                 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2669                         radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2670                 }
2671         }
2672         return VK_SUCCESS;
2673 }
2674
2675 VkResult radv_EnumerateInstanceExtensionProperties(
2676     const char*                                 pLayerName,
2677     uint32_t*                                   pPropertyCount,
2678     VkExtensionProperties*                      pProperties)
2679 {
2680         VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2681
2682         for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2683                 if (radv_supported_instance_extensions.extensions[i]) {
2684                         vk_outarray_append(&out, prop) {
2685                                 *prop = radv_instance_extensions[i];
2686                         }
2687                 }
2688         }
2689
2690         return vk_outarray_status(&out);
2691 }
2692
2693 VkResult radv_EnumerateDeviceExtensionProperties(
2694     VkPhysicalDevice                            physicalDevice,
2695     const char*                                 pLayerName,
2696     uint32_t*                                   pPropertyCount,
2697     VkExtensionProperties*                      pProperties)
2698 {
2699         RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2700         VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2701
2702         for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2703                 if (device->supported_extensions.extensions[i]) {
2704                         vk_outarray_append(&out, prop) {
2705                                 *prop = radv_device_extensions[i];
2706                         }
2707                 }
2708         }
2709
2710         return vk_outarray_status(&out);
2711 }
2712
2713 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2714         VkInstance                                  _instance,
2715         const char*                                 pName)
2716 {
2717         RADV_FROM_HANDLE(radv_instance, instance, _instance);
2718
2719         return radv_lookup_entrypoint_checked(pName,
2720                                               instance ? instance->apiVersion : 0,
2721                                               instance ? &instance->enabled_extensions : NULL,
2722                                               NULL);
2723 }
2724
2725 /* The loader wants us to expose a second GetInstanceProcAddr function
2726  * to work around certain LD_PRELOAD issues seen in apps.
2727  */
2728 PUBLIC
2729 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2730         VkInstance                                  instance,
2731         const char*                                 pName);
2732
2733 PUBLIC
2734 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2735         VkInstance                                  instance,
2736         const char*                                 pName)
2737 {
2738         return radv_GetInstanceProcAddr(instance, pName);
2739 }
2740
2741 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2742         VkDevice                                    _device,
2743         const char*                                 pName)
2744 {
2745         RADV_FROM_HANDLE(radv_device, device, _device);
2746
2747         return radv_lookup_entrypoint_checked(pName,
2748                                               device->instance->apiVersion,
2749                                               &device->instance->enabled_extensions,
2750                                               &device->enabled_extensions);
2751 }
2752
2753 bool radv_get_memory_fd(struct radv_device *device,
2754                         struct radv_device_memory *memory,
2755                         int *pFD)
2756 {
2757         struct radeon_bo_metadata metadata;
2758
2759         if (memory->image) {
2760                 radv_init_metadata(device, memory->image, &metadata);
2761                 device->ws->buffer_set_metadata(memory->bo, &metadata);
2762         }
2763
2764         return device->ws->buffer_get_fd(device->ws, memory->bo,
2765                                          pFD);
2766 }
2767
2768 static VkResult radv_alloc_memory(struct radv_device *device,
2769                                   const VkMemoryAllocateInfo*     pAllocateInfo,
2770                                   const VkAllocationCallbacks*    pAllocator,
2771                                   VkDeviceMemory*                 pMem)
2772 {
2773         struct radv_device_memory *mem;
2774         VkResult result;
2775         enum radeon_bo_domain domain;
2776         uint32_t flags = 0;
2777         enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2778
2779         assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2780
2781         if (pAllocateInfo->allocationSize == 0) {
2782                 /* Apparently, this is allowed */
2783                 *pMem = VK_NULL_HANDLE;
2784                 return VK_SUCCESS;
2785         }
2786
2787         const VkImportMemoryFdInfoKHR *import_info =
2788                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2789         const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2790                 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2791         const VkExportMemoryAllocateInfoKHR *export_info =
2792                 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2793         const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2794                 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2795
2796         const struct wsi_memory_allocate_info *wsi_info =
2797                 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2798
2799         mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2800                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2801         if (mem == NULL)
2802                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2803
2804         if (wsi_info && wsi_info->implicit_sync)
2805                 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2806
2807         if (dedicate_info) {
2808                 mem->image = radv_image_from_handle(dedicate_info->image);
2809                 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2810         } else {
2811                 mem->image = NULL;
2812                 mem->buffer = NULL;
2813         }
2814
2815         mem->user_ptr = NULL;
2816
2817         if (import_info) {
2818                 assert(import_info->handleType ==
2819                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2820                        import_info->handleType ==
2821                        VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2822                 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2823                                                      NULL, NULL);
2824                 if (!mem->bo) {
2825                         result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2826                         goto fail;
2827                 } else {
2828                         close(import_info->fd);
2829                 }
2830         } else if (host_ptr_info) {
2831                 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
2832                 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
2833                 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
2834                                                       pAllocateInfo->allocationSize);
2835                 if (!mem->bo) {
2836                         result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2837                         goto fail;
2838                 } else {
2839                         mem->user_ptr = host_ptr_info->pHostPointer;
2840                 }
2841         } else {
2842                 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2843                 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2844                     mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2845                         domain = RADEON_DOMAIN_GTT;
2846                 else
2847                         domain = RADEON_DOMAIN_VRAM;
2848
2849                 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2850                         flags |= RADEON_FLAG_NO_CPU_ACCESS;
2851                 else
2852                         flags |= RADEON_FLAG_CPU_ACCESS;
2853
2854                 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2855                         flags |= RADEON_FLAG_GTT_WC;
2856
2857                 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
2858                         flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2859
2860                 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2861                                                     domain, flags);
2862
2863                 if (!mem->bo) {
2864                         result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2865                         goto fail;
2866                 }
2867                 mem->type_index = mem_type_index;
2868         }
2869
2870         result = radv_bo_list_add(device, mem->bo);
2871         if (result != VK_SUCCESS)
2872                 goto fail_bo;
2873
2874         *pMem = radv_device_memory_to_handle(mem);
2875
2876         return VK_SUCCESS;
2877
2878 fail_bo:
2879         device->ws->buffer_destroy(mem->bo);
2880 fail:
2881         vk_free2(&device->alloc, pAllocator, mem);
2882
2883         return result;
2884 }
2885
2886 VkResult radv_AllocateMemory(
2887         VkDevice                                    _device,
2888         const VkMemoryAllocateInfo*                 pAllocateInfo,
2889         const VkAllocationCallbacks*                pAllocator,
2890         VkDeviceMemory*                             pMem)
2891 {
2892         RADV_FROM_HANDLE(radv_device, device, _device);
2893         return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2894 }
2895
2896 void radv_FreeMemory(
2897         VkDevice                                    _device,
2898         VkDeviceMemory                              _mem,
2899         const VkAllocationCallbacks*                pAllocator)
2900 {
2901         RADV_FROM_HANDLE(radv_device, device, _device);
2902         RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2903
2904         if (mem == NULL)
2905                 return;
2906
2907         radv_bo_list_remove(device, mem->bo);
2908         device->ws->buffer_destroy(mem->bo);
2909         mem->bo = NULL;
2910
2911         vk_free2(&device->alloc, pAllocator, mem);
2912 }
2913
2914 VkResult radv_MapMemory(
2915         VkDevice                                    _device,
2916         VkDeviceMemory                              _memory,
2917         VkDeviceSize                                offset,
2918         VkDeviceSize                                size,
2919         VkMemoryMapFlags                            flags,
2920         void**                                      ppData)
2921 {
2922         RADV_FROM_HANDLE(radv_device, device, _device);
2923         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2924
2925         if (mem == NULL) {
2926                 *ppData = NULL;
2927                 return VK_SUCCESS;
2928         }
2929
2930         if (mem->user_ptr)
2931                 *ppData = mem->user_ptr;
2932         else
2933                 *ppData = device->ws->buffer_map(mem->bo);
2934
2935         if (*ppData) {
2936                 *ppData += offset;
2937                 return VK_SUCCESS;
2938         }
2939
2940         return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
2941 }
2942
2943 void radv_UnmapMemory(
2944         VkDevice                                    _device,
2945         VkDeviceMemory                              _memory)
2946 {
2947         RADV_FROM_HANDLE(radv_device, device, _device);
2948         RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2949
2950         if (mem == NULL)
2951                 return;
2952
2953         if (mem->user_ptr == NULL)
2954                 device->ws->buffer_unmap(mem->bo);
2955 }
2956
2957 VkResult radv_FlushMappedMemoryRanges(
2958         VkDevice                                    _device,
2959         uint32_t                                    memoryRangeCount,
2960         const VkMappedMemoryRange*                  pMemoryRanges)
2961 {
2962         return VK_SUCCESS;
2963 }
2964
2965 VkResult radv_InvalidateMappedMemoryRanges(
2966         VkDevice                                    _device,
2967         uint32_t                                    memoryRangeCount,
2968         const VkMappedMemoryRange*                  pMemoryRanges)
2969 {
2970         return VK_SUCCESS;
2971 }
2972
2973 void radv_GetBufferMemoryRequirements(
2974         VkDevice                                    _device,
2975         VkBuffer                                    _buffer,
2976         VkMemoryRequirements*                       pMemoryRequirements)
2977 {
2978         RADV_FROM_HANDLE(radv_device, device, _device);
2979         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2980
2981         pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2982
2983         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2984                 pMemoryRequirements->alignment = 4096;
2985         else
2986                 pMemoryRequirements->alignment = 16;
2987
2988         pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2989 }
2990
2991 void radv_GetBufferMemoryRequirements2(
2992         VkDevice                                     device,
2993         const VkBufferMemoryRequirementsInfo2KHR*    pInfo,
2994         VkMemoryRequirements2KHR*                    pMemoryRequirements)
2995 {
2996         radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2997                                         &pMemoryRequirements->memoryRequirements);
2998         RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2999         vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3000                 switch (ext->sType) {
3001                 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
3002                         VkMemoryDedicatedRequirementsKHR *req =
3003                                        (VkMemoryDedicatedRequirementsKHR *) ext;
3004                         req->requiresDedicatedAllocation = buffer->shareable;
3005                         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3006                         break;
3007                 }
3008                 default:
3009                         break;
3010                 }
3011         }
3012 }
3013
3014 void radv_GetImageMemoryRequirements(
3015         VkDevice                                    _device,
3016         VkImage                                     _image,
3017         VkMemoryRequirements*                       pMemoryRequirements)
3018 {
3019         RADV_FROM_HANDLE(radv_device, device, _device);
3020         RADV_FROM_HANDLE(radv_image, image, _image);
3021
3022         pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3023
3024         pMemoryRequirements->size = image->size;
3025         pMemoryRequirements->alignment = image->alignment;
3026 }
3027
3028 void radv_GetImageMemoryRequirements2(
3029         VkDevice                                    device,
3030         const VkImageMemoryRequirementsInfo2KHR*    pInfo,
3031         VkMemoryRequirements2KHR*                   pMemoryRequirements)
3032 {
3033         radv_GetImageMemoryRequirements(device, pInfo->image,
3034                                         &pMemoryRequirements->memoryRequirements);
3035
3036         RADV_FROM_HANDLE(radv_image, image, pInfo->image);
3037
3038         vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3039                 switch (ext->sType) {
3040                 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
3041                         VkMemoryDedicatedRequirementsKHR *req =
3042                                        (VkMemoryDedicatedRequirementsKHR *) ext;
3043                         req->requiresDedicatedAllocation = image->shareable;
3044                         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3045                         break;
3046                 }
3047                 default:
3048                         break;
3049                 }
3050         }
3051 }
3052
3053 void radv_GetImageSparseMemoryRequirements(
3054         VkDevice                                    device,
3055         VkImage                                     image,
3056         uint32_t*                                   pSparseMemoryRequirementCount,
3057         VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
3058 {
3059         stub();
3060 }
3061
3062 void radv_GetImageSparseMemoryRequirements2(
3063         VkDevice                                    device,
3064         const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
3065         uint32_t*                                   pSparseMemoryRequirementCount,
3066         VkSparseImageMemoryRequirements2KHR*            pSparseMemoryRequirements)
3067 {
3068         stub();
3069 }
3070
3071 void radv_GetDeviceMemoryCommitment(
3072         VkDevice                                    device,
3073         VkDeviceMemory                              memory,
3074         VkDeviceSize*                               pCommittedMemoryInBytes)
3075 {
3076         *pCommittedMemoryInBytes = 0;
3077 }
3078
3079 VkResult radv_BindBufferMemory2(VkDevice device,
3080                                 uint32_t bindInfoCount,
3081                                 const VkBindBufferMemoryInfoKHR *pBindInfos)
3082 {
3083         for (uint32_t i = 0; i < bindInfoCount; ++i) {
3084                 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3085                 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
3086
3087                 if (mem) {
3088                         buffer->bo = mem->bo;
3089                         buffer->offset = pBindInfos[i].memoryOffset;
3090                 } else {
3091                         buffer->bo = NULL;
3092                 }
3093         }
3094         return VK_SUCCESS;
3095 }
3096
3097 VkResult radv_BindBufferMemory(
3098         VkDevice                                    device,
3099         VkBuffer                                    buffer,
3100         VkDeviceMemory                              memory,
3101         VkDeviceSize                                memoryOffset)
3102 {
3103         const VkBindBufferMemoryInfoKHR info = {
3104                 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
3105                 .buffer = buffer,
3106                 .memory = memory,
3107                 .memoryOffset = memoryOffset
3108         };
3109
3110         return radv_BindBufferMemory2(device, 1, &info);
3111 }
3112
3113 VkResult radv_BindImageMemory2(VkDevice device,
3114                                uint32_t bindInfoCount,
3115                                const VkBindImageMemoryInfoKHR *pBindInfos)
3116 {
3117         for (uint32_t i = 0; i < bindInfoCount; ++i) {
3118                 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3119                 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
3120
3121                 if (mem) {
3122                         image->bo = mem->bo;
3123                         image->offset = pBindInfos[i].memoryOffset;
3124                 } else {
3125                         image->bo = NULL;
3126                         image->offset = 0;
3127                 }
3128         }
3129         return VK_SUCCESS;
3130 }
3131
3132
3133 VkResult radv_BindImageMemory(
3134         VkDevice                                    device,
3135         VkImage                                     image,
3136         VkDeviceMemory                              memory,
3137         VkDeviceSize                                memoryOffset)
3138 {
3139         const VkBindImageMemoryInfoKHR info = {
3140                 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
3141                 .image = image,
3142                 .memory = memory,
3143                 .memoryOffset = memoryOffset
3144         };
3145
3146         return radv_BindImageMemory2(device, 1, &info);
3147 }
3148
3149
3150 static void
3151 radv_sparse_buffer_bind_memory(struct radv_device *device,
3152                                const VkSparseBufferMemoryBindInfo *bind)
3153 {
3154         RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
3155
3156         for (uint32_t i = 0; i < bind->bindCount; ++i) {
3157                 struct radv_device_memory *mem = NULL;
3158
3159                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3160                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3161
3162                 device->ws->buffer_virtual_bind(buffer->bo,
3163                                                 bind->pBinds[i].resourceOffset,
3164                                                 bind->pBinds[i].size,
3165                                                 mem ? mem->bo : NULL,
3166                                                 bind->pBinds[i].memoryOffset);
3167         }
3168 }
3169
3170 static void
3171 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
3172                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
3173 {
3174         RADV_FROM_HANDLE(radv_image, image, bind->image);
3175
3176         for (uint32_t i = 0; i < bind->bindCount; ++i) {
3177                 struct radv_device_memory *mem = NULL;
3178
3179                 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3180                         mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3181
3182                 device->ws->buffer_virtual_bind(image->bo,
3183                                                 bind->pBinds[i].resourceOffset,
3184                                                 bind->pBinds[i].size,
3185                                                 mem ? mem->bo : NULL,
3186                                                 bind->pBinds[i].memoryOffset);
3187         }
3188 }
3189
3190  VkResult radv_QueueBindSparse(
3191         VkQueue                                     _queue,
3192         uint32_t                                    bindInfoCount,
3193         const VkBindSparseInfo*                     pBindInfo,
3194         VkFence                                     _fence)
3195 {
3196         RADV_FROM_HANDLE(radv_fence, fence, _fence);
3197         RADV_FROM_HANDLE(radv_queue, queue, _queue);
3198         struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3199         bool fence_emitted = false;
3200         VkResult result;
3201         int ret;
3202
3203         for (uint32_t i = 0; i < bindInfoCount; ++i) {
3204                 struct radv_winsys_sem_info sem_info;
3205                 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
3206                         radv_sparse_buffer_bind_memory(queue->device,
3207                                                        pBindInfo[i].pBufferBinds + j);
3208                 }
3209
3210                 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
3211                         radv_sparse_image_opaque_bind_memory(queue->device,
3212                                                              pBindInfo[i].pImageOpaqueBinds + j);
3213                 }
3214
3215                 VkResult result;
3216                 result = radv_alloc_sem_info(queue->device->instance,
3217                                              &sem_info,
3218                                              pBindInfo[i].waitSemaphoreCount,
3219                                              pBindInfo[i].pWaitSemaphores,
3220                                              pBindInfo[i].signalSemaphoreCount,
3221                                              pBindInfo[i].pSignalSemaphores,
3222                                              _fence);
3223                 if (result != VK_SUCCESS)
3224                         return result;
3225
3226                 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
3227                         ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3228                                                           &queue->device->empty_cs[queue->queue_family_index],
3229                                                           1, NULL, NULL,
3230                                                           &sem_info, NULL,
3231                                                           false, base_fence);
3232                         if (ret) {
3233                                 radv_loge("failed to submit CS %d\n", i);
3234                                 abort();
3235                         }
3236
3237                         fence_emitted = true;
3238                         if (fence)
3239                                 fence->submitted = true;
3240                 }
3241
3242                 radv_free_sem_info(&sem_info);
3243
3244         }
3245
3246         if (fence) {
3247                 if (!fence_emitted) {
3248                         result = radv_signal_fence(queue, fence);
3249                         if (result != VK_SUCCESS)
3250                                 return result;
3251                 }
3252                 fence->submitted = true;
3253         }
3254
3255         return VK_SUCCESS;
3256 }
3257
3258 VkResult radv_CreateFence(
3259         VkDevice                                    _device,
3260         const VkFenceCreateInfo*                    pCreateInfo,
3261         const VkAllocationCallbacks*                pAllocator,
3262         VkFence*                                    pFence)
3263 {
3264         RADV_FROM_HANDLE(radv_device, device, _device);
3265         const VkExportFenceCreateInfoKHR *export =
3266                 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
3267         VkExternalFenceHandleTypeFlagsKHR handleTypes =
3268                 export ? export->handleTypes : 0;
3269
3270         struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
3271                                                sizeof(*fence), 8,
3272                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3273
3274         if (!fence)
3275                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3276
3277         fence->fence_wsi = NULL;
3278         fence->submitted = false;
3279         fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
3280         fence->temp_syncobj = 0;
3281         if (device->always_use_syncobj || handleTypes) {
3282                 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
3283                 if (ret) {
3284                         vk_free2(&device->alloc, pAllocator, fence);
3285                         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3286                 }
3287                 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
3288                         device->ws->signal_syncobj(device->ws, fence->syncobj);
3289                 }
3290                 fence->fence = NULL;
3291         } else {
3292                 fence->fence = device->ws->create_fence();
3293                 if (!fence->fence) {
3294                         vk_free2(&device->alloc, pAllocator, fence);
3295                         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3296                 }
3297                 fence->syncobj = 0;
3298         }
3299
3300         *pFence = radv_fence_to_handle(fence);
3301
3302         return VK_SUCCESS;
3303 }
3304
3305 void radv_DestroyFence(
3306         VkDevice                                    _device,
3307         VkFence                                     _fence,
3308         const VkAllocationCallbacks*                pAllocator)
3309 {
3310         RADV_FROM_HANDLE(radv_device, device, _device);
3311         RADV_FROM_HANDLE(radv_fence, fence, _fence);
3312
3313         if (!fence)
3314                 return;
3315
3316         if (fence->temp_syncobj)
3317                 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3318         if (fence->syncobj)
3319                 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3320         if (fence->fence)
3321                 device->ws->destroy_fence(fence->fence);
3322         if (fence->fence_wsi)
3323                 fence->fence_wsi->destroy(fence->fence_wsi);
3324         vk_free2(&device->alloc, pAllocator, fence);
3325 }
3326
3327
3328 static uint64_t radv_get_current_time()
3329 {
3330         struct timespec tv;
3331         clock_gettime(CLOCK_MONOTONIC, &tv);
3332         return tv.tv_nsec + tv.tv_sec*1000000000ull;
3333 }
3334
3335 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3336 {
3337         uint64_t current_time = radv_get_current_time();
3338
3339         timeout = MIN2(UINT64_MAX - current_time, timeout);
3340
3341         return current_time + timeout;
3342 }
3343
3344
3345 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
3346 {
3347         for (uint32_t i = 0; i < fenceCount; ++i) {
3348                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3349                 if (fence->fence == NULL || fence->syncobj ||
3350                     fence->temp_syncobj ||
3351                     (!fence->signalled && !fence->submitted))
3352                         return false;
3353         }
3354         return true;
3355 }
3356
3357 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
3358 {
3359         for (uint32_t i = 0; i < fenceCount; ++i) {
3360                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3361                 if (fence->syncobj == 0 && fence->temp_syncobj == 0)
3362                         return false;
3363         }
3364         return true;
3365 }
3366
3367 VkResult radv_WaitForFences(
3368         VkDevice                                    _device,
3369         uint32_t                                    fenceCount,
3370         const VkFence*                              pFences,
3371         VkBool32                                    waitAll,
3372         uint64_t                                    timeout)
3373 {
3374         RADV_FROM_HANDLE(radv_device, device, _device);
3375         timeout = radv_get_absolute_timeout(timeout);
3376
3377         if (device->always_use_syncobj &&
3378             radv_all_fences_syncobj(fenceCount, pFences))
3379         {
3380                 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3381                 if (!handles)
3382                         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3383
3384                 for (uint32_t i = 0; i < fenceCount; ++i) {
3385                         RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3386                         handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3387                 }
3388
3389                 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3390
3391                 free(handles);
3392                 return success ? VK_SUCCESS : VK_TIMEOUT;
3393         }
3394
3395         if (!waitAll && fenceCount > 1) {
3396                 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3397                 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3398                         uint32_t wait_count = 0;
3399                         struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3400                         if (!fences)
3401                                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3402
3403                         for (uint32_t i = 0; i < fenceCount; ++i) {
3404                                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3405
3406                                 if (fence->signalled) {
3407                                         free(fences);
3408                                         return VK_SUCCESS;
3409                                 }
3410
3411                                 fences[wait_count++] = fence->fence;
3412                         }
3413
3414                         bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3415                                                                waitAll, timeout - radv_get_current_time());
3416
3417                         free(fences);
3418                         return success ? VK_SUCCESS : VK_TIMEOUT;
3419                 }
3420
3421                 while(radv_get_current_time() <= timeout) {
3422                         for (uint32_t i = 0; i < fenceCount; ++i) {
3423                                 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3424                                         return VK_SUCCESS;
3425                         }
3426                 }
3427                 return VK_TIMEOUT;
3428         }
3429
3430         for (uint32_t i = 0; i < fenceCount; ++i) {
3431                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3432                 bool expired = false;
3433
3434                 if (fence->temp_syncobj) {
3435                         if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3436                                 return VK_TIMEOUT;
3437                         continue;
3438                 }
3439
3440                 if (fence->syncobj) {
3441                         if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3442                                 return VK_TIMEOUT;
3443                         continue;
3444                 }
3445
3446                 if (fence->signalled)
3447                         continue;
3448
3449                 if (fence->fence) {
3450                         if (!fence->submitted) {
3451                                 while(radv_get_current_time() <= timeout &&
3452                                       !fence->submitted)
3453                                         /* Do nothing */;
3454
3455                                 if (!fence->submitted)
3456                                         return VK_TIMEOUT;
3457
3458                                 /* Recheck as it may have been set by
3459                                  * submitting operations. */
3460
3461                                 if (fence->signalled)
3462                                         continue;
3463                         }
3464
3465                         expired = device->ws->fence_wait(device->ws,
3466                                                          fence->fence,
3467                                                          true, timeout);
3468                         if (!expired)
3469                                 return VK_TIMEOUT;
3470                 }
3471
3472                 if (fence->fence_wsi) {
3473                         VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
3474                         if (result != VK_SUCCESS)
3475                                 return result;
3476                 }
3477
3478                 fence->signalled = true;
3479         }
3480
3481         return VK_SUCCESS;
3482 }
3483
3484 VkResult radv_ResetFences(VkDevice _device,
3485                           uint32_t fenceCount,
3486                           const VkFence *pFences)
3487 {
3488         RADV_FROM_HANDLE(radv_device, device, _device);
3489
3490         for (unsigned i = 0; i < fenceCount; ++i) {
3491                 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3492                 fence->submitted = fence->signalled = false;
3493
3494                 /* Per spec, we first restore the permanent payload, and then reset, so
3495                  * having a temp syncobj should not skip resetting the permanent syncobj. */
3496                 if (fence->temp_syncobj) {
3497                         device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3498                         fence->temp_syncobj = 0;
3499                 }
3500
3501                 if (fence->syncobj) {
3502                         device->ws->reset_syncobj(device->ws, fence->syncobj);
3503                 }
3504         }
3505
3506         return VK_SUCCESS;
3507 }
3508
3509 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3510 {
3511         RADV_FROM_HANDLE(radv_device, device, _device);
3512         RADV_FROM_HANDLE(radv_fence, fence, _fence);
3513
3514         if (fence->temp_syncobj) {
3515                         bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3516                         return success ? VK_SUCCESS : VK_NOT_READY;
3517         }
3518
3519         if (fence->syncobj) {
3520                         bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3521                         return success ? VK_SUCCESS : VK_NOT_READY;
3522         }
3523
3524         if (fence->signalled)
3525                 return VK_SUCCESS;
3526         if (!fence->submitted)
3527                 return VK_NOT_READY;
3528         if (fence->fence) {
3529                 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3530                         return VK_NOT_READY;
3531         }
3532         if (fence->fence_wsi) {
3533                 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
3534
3535                 if (result != VK_SUCCESS) {
3536                         if (result == VK_TIMEOUT)
3537                                 return VK_NOT_READY;
3538                         return result;
3539                 }
3540         }
3541         return VK_SUCCESS;
3542 }
3543
3544
3545 // Queue semaphore functions
3546
3547 VkResult radv_CreateSemaphore(
3548         VkDevice                                    _device,
3549         const VkSemaphoreCreateInfo*                pCreateInfo,
3550         const VkAllocationCallbacks*                pAllocator,
3551         VkSemaphore*                                pSemaphore)
3552 {
3553         RADV_FROM_HANDLE(radv_device, device, _device);
3554         const VkExportSemaphoreCreateInfoKHR *export =
3555                 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3556         VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3557                 export ? export->handleTypes : 0;
3558
3559         struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3560                                                sizeof(*sem), 8,
3561                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3562         if (!sem)
3563                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3564
3565         sem->temp_syncobj = 0;
3566         /* create a syncobject if we are going to export this semaphore */
3567         if (device->always_use_syncobj || handleTypes) {
3568                 assert (device->physical_device->rad_info.has_syncobj);
3569                 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3570                 if (ret) {
3571                         vk_free2(&device->alloc, pAllocator, sem);
3572                         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3573                 }
3574                 sem->sem = NULL;
3575         } else {
3576                 sem->sem = device->ws->create_sem(device->ws);
3577                 if (!sem->sem) {
3578                         vk_free2(&device->alloc, pAllocator, sem);
3579                         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3580                 }
3581                 sem->syncobj = 0;
3582         }
3583
3584         *pSemaphore = radv_semaphore_to_handle(sem);
3585         return VK_SUCCESS;
3586 }
3587
3588 void radv_DestroySemaphore(
3589         VkDevice                                    _device,
3590         VkSemaphore                                 _semaphore,
3591         const VkAllocationCallbacks*                pAllocator)
3592 {
3593         RADV_FROM_HANDLE(radv_device, device, _device);
3594         RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3595         if (!_semaphore)
3596                 return;
3597
3598         if (sem->syncobj)
3599                 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3600         else
3601                 device->ws->destroy_sem(sem->sem);
3602         vk_free2(&device->alloc, pAllocator, sem);
3603 }
3604
3605 VkResult radv_CreateEvent(
3606         VkDevice                                    _device,
3607         const VkEventCreateInfo*                    pCreateInfo,
3608         const VkAllocationCallbacks*                pAllocator,
3609         VkEvent*                                    pEvent)
3610 {
3611         RADV_FROM_HANDLE(radv_device, device, _device);
3612         struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3613                                                sizeof(*event), 8,
3614                                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3615
3616         if (!event)
3617                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3618
3619         event->bo = device->ws->buffer_create(device->ws, 8, 8,
3620                                               RADEON_DOMAIN_GTT,
3621                                               RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3622         if (!event->bo) {
3623                 vk_free2(&device->alloc, pAllocator, event);
3624                 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3625         }
3626
3627         event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3628
3629         *pEvent = radv_event_to_handle(event);
3630
3631         return VK_SUCCESS;
3632 }
3633
3634 void radv_DestroyEvent(
3635         VkDevice                                    _device,
3636         VkEvent                                     _event,
3637         const VkAllocationCallbacks*                pAllocator)
3638 {
3639         RADV_FROM_HANDLE(radv_device, device, _device);
3640         RADV_FROM_HANDLE(radv_event, event, _event);
3641
3642         if (!event)
3643                 return;
3644         device->ws->buffer_destroy(event->bo);
3645         vk_free2(&device->alloc, pAllocator, event);
3646 }
3647
3648 VkResult radv_GetEventStatus(
3649         VkDevice                                    _device,
3650         VkEvent                                     _event)
3651 {
3652         RADV_FROM_HANDLE(radv_event, event, _event);
3653
3654         if (*event->map == 1)
3655                 return VK_EVENT_SET;
3656         return VK_EVENT_RESET;
3657 }
3658
3659 VkResult radv_SetEvent(
3660         VkDevice                                    _device,
3661         VkEvent                                     _event)
3662 {
3663         RADV_FROM_HANDLE(radv_event, event, _event);
3664         *event->map = 1;
3665
3666         return VK_SUCCESS;
3667 }
3668
3669 VkResult radv_ResetEvent(
3670     VkDevice                                    _device,
3671     VkEvent                                     _event)
3672 {
3673         RADV_FROM_HANDLE(radv_event, event, _event);
3674         *event->map = 0;
3675
3676         return VK_SUCCESS;
3677 }
3678
3679 VkResult radv_CreateBuffer(
3680         VkDevice                                    _device,
3681         const VkBufferCreateInfo*                   pCreateInfo,
3682         const VkAllocationCallbacks*                pAllocator,
3683         VkBuffer*                                   pBuffer)
3684 {
3685         RADV_FROM_HANDLE(radv_device, device, _device);
3686         struct radv_buffer *buffer;
3687
3688         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3689
3690         buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3691                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3692         if (buffer == NULL)
3693                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3694
3695         buffer->size = pCreateInfo->size;
3696         buffer->usage = pCreateInfo->usage;
3697         buffer->bo = NULL;
3698         buffer->offset = 0;
3699         buffer->flags = pCreateInfo->flags;
3700
3701         buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3702                                                  EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3703
3704         if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3705                 buffer->bo = device->ws->buffer_create(device->ws,
3706                                                        align64(buffer->size, 4096),
3707                                                        4096, 0, RADEON_FLAG_VIRTUAL);
3708                 if (!buffer->bo) {
3709                         vk_free2(&device->alloc, pAllocator, buffer);
3710                         return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3711                 }
3712         }
3713
3714         *pBuffer = radv_buffer_to_handle(buffer);
3715
3716         return VK_SUCCESS;
3717 }
3718
3719 void radv_DestroyBuffer(
3720         VkDevice                                    _device,
3721         VkBuffer                                    _buffer,
3722         const VkAllocationCallbacks*                pAllocator)
3723 {
3724         RADV_FROM_HANDLE(radv_device, device, _device);
3725         RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3726
3727         if (!buffer)
3728                 return;
3729
3730         if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3731                 device->ws->buffer_destroy(buffer->bo);
3732
3733         vk_free2(&device->alloc, pAllocator, buffer);
3734 }
3735
3736 static inline unsigned
3737 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3738 {
3739         if (stencil)
3740                 return image->surface.u.legacy.stencil_tiling_index[level];
3741         else
3742                 return image->surface.u.legacy.tiling_index[level];
3743 }
3744
3745 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3746 {
3747         return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3748 }
3749
3750 static uint32_t
3751 radv_init_dcc_control_reg(struct radv_device *device,
3752                           struct radv_image_view *iview)
3753 {
3754         unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3755         unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3756         unsigned max_compressed_block_size;
3757         unsigned independent_64b_blocks;
3758
3759         if (device->physical_device->rad_info.chip_class < VI)
3760                 return 0;
3761
3762         if (iview->image->info.samples > 1) {
3763                 if (iview->image->surface.bpe == 1)
3764                         max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3765                 else if (iview->image->surface.bpe == 2)
3766                         max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3767         }
3768
3769         if (!device->physical_device->rad_info.has_dedicated_vram) {
3770                 /* amdvlk: [min-compressed-block-size] should be set to 32 for
3771                  * dGPU and 64 for APU because all of our APUs to date use
3772                  * DIMMs which have a request granularity size of 64B while all
3773                  * other chips have a 32B request size.
3774                  */
3775                 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3776         }
3777
3778         if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
3779                                    VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3780                                    VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3781                 /* If this DCC image is potentially going to be used in texture
3782                  * fetches, we need some special settings.
3783                  */
3784                 independent_64b_blocks = 1;
3785                 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3786         } else {
3787                 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
3788                  * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
3789                  * big as possible for better compression state.
3790                  */
3791                 independent_64b_blocks = 0;
3792                 max_compressed_block_size = max_uncompressed_block_size;
3793         }
3794
3795         return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3796                S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3797                S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3798                S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3799 }
3800
3801 static void
3802 radv_initialise_color_surface(struct radv_device *device,
3803                               struct radv_color_buffer_info *cb,
3804                               struct radv_image_view *iview)
3805 {
3806         const struct vk_format_description *desc;
3807         unsigned ntype, format, swap, endian;
3808         unsigned blend_clamp = 0, blend_bypass = 0;
3809         uint64_t va;
3810         const struct radeon_surf *surf = &iview->image->surface;
3811
3812         desc = vk_format_description(iview->vk_format);
3813
3814         memset(cb, 0, sizeof(*cb));
3815
3816         /* Intensity is implemented as Red, so treat it that way. */
3817         cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3818
3819         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3820
3821         cb->cb_color_base = va >> 8;
3822
3823         if (device->physical_device->rad_info.chip_class >= GFX9) {
3824                 struct gfx9_surf_meta_flags meta;
3825                 if (iview->image->dcc_offset)
3826                         meta = iview->image->surface.u.gfx9.dcc;
3827                 else
3828                         meta = iview->image->surface.u.gfx9.cmask;
3829
3830                 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3831                         S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3832                         S_028C74_RB_ALIGNED(meta.rb_aligned) |
3833                         S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3834
3835                 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3836                 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3837         } else {
3838                 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3839                 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3840
3841                 cb->cb_color_base += level_info->offset >> 8;
3842                 if (level_info->mode == RADEON_SURF_MODE_2D)
3843                         cb->cb_color_base |= iview->image->surface.tile_swizzle;
3844
3845                 pitch_tile_max = level_info->nblk_x / 8 - 1;
3846                 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3847                 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3848
3849                 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3850                 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3851                 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3852
3853                 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3854
3855                 if (radv_image_has_fmask(iview->image)) {
3856                         if (device->physical_device->rad_info.chip_class >= CIK)
3857                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3858                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3859                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3860                 } else {
3861                         /* This must be set for fast clear to work without FMASK. */
3862                         if (device->physical_device->rad_info.chip_class >= CIK)
3863                                 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3864                         cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3865                         cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3866                 }
3867         }
3868
3869         /* CMASK variables */
3870         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3871         va += iview->image->cmask.offset;
3872         cb->cb_color_cmask = va >> 8;
3873
3874         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3875         va += iview->image->dcc_offset;
3876         cb->cb_dcc_base = va >> 8;
3877         cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3878
3879         uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3880         cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3881                 S_028C6C_SLICE_MAX(max_slice);
3882
3883         if (iview->image->info.samples > 1) {
3884                 unsigned log_samples = util_logbase2(iview->image->info.samples);
3885
3886                 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3887                         S_028C74_NUM_FRAGMENTS(log_samples);
3888         }
3889
3890         if (radv_image_has_fmask(iview->image)) {
3891                 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3892                 cb->cb_color_fmask = va >> 8;
3893                 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3894         } else {
3895                 cb->cb_color_fmask = cb->cb_color_base;
3896         }
3897
3898         ntype = radv_translate_color_numformat(iview->vk_format,
3899                                                desc,
3900                                                vk_format_get_first_non_void_channel(iview->vk_format));
3901         format = radv_translate_colorformat(iview->vk_format);
3902         if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3903                 radv_finishme("Illegal color\n");
3904         swap = radv_translate_colorswap(iview->vk_format, FALSE);
3905         endian = radv_colorformat_endian_swap(format);
3906
3907         /* blend clamp should be set for all NORM/SRGB types */
3908         if (ntype == V_028C70_NUMBER_UNORM ||
3909             ntype == V_028C70_NUMBER_SNORM ||
3910             ntype == V_028C70_NUMBER_SRGB)
3911                 blend_clamp = 1;
3912
3913         /* set blend bypass according to docs if SINT/UINT or
3914            8/24 COLOR variants */
3915         if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3916             format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3917             format == V_028C70_COLOR_X24_8_32_FLOAT) {
3918                 blend_clamp = 0;
3919                 blend_bypass = 1;
3920         }
3921 #if 0
3922         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3923             (format == V_028C70_COLOR_8 ||
3924              format == V_028C70_COLOR_8_8 ||
3925              format == V_028C70_COLOR_8_8_8_8))
3926                 ->color_is_int8 = true;
3927 #endif
3928         cb->cb_color_info = S_028C70_FORMAT(format) |
3929                 S_028C70_COMP_SWAP(swap) |
3930                 S_028C70_BLEND_CLAMP(blend_clamp) |
3931                 S_028C70_BLEND_BYPASS(blend_bypass) |
3932                 S_028C70_SIMPLE_FLOAT(1) |
3933                 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3934                                     ntype != V_028C70_NUMBER_SNORM &&
3935                                     ntype != V_028C70_NUMBER_SRGB &&
3936                                     format != V_028C70_COLOR_8_24 &&
3937                                     format != V_028C70_COLOR_24_8) |
3938                 S_028C70_NUMBER_TYPE(ntype) |
3939                 S_028C70_ENDIAN(endian);
3940         if (radv_image_has_fmask(iview->image)) {
3941                 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3942                 if (device->physical_device->rad_info.chip_class == SI) {
3943                         unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3944                         cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3945                 }
3946         }
3947
3948         if (radv_image_has_cmask(iview->image) &&
3949             !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3950                 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3951
3952         if (radv_dcc_enabled(iview->image, iview->base_mip))
3953                 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3954
3955         cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
3956
3957         /* This must be set for fast clear to work without FMASK. */
3958         if (!radv_image_has_fmask(iview->image) &&
3959             device->physical_device->rad_info.chip_class == SI) {
3960                 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3961                 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3962         }
3963
3964         if (device->physical_device->rad_info.chip_class >= GFX9) {
3965                 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3966                   (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3967
3968                 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3969                 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3970                         S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3971                 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3972                         S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3973                         S_028C68_MAX_MIP(iview->image->info.levels - 1);
3974         }
3975 }
3976
3977 static unsigned
3978 radv_calc_decompress_on_z_planes(struct radv_device *device,
3979                                  struct radv_image_view *iview)
3980 {
3981         unsigned max_zplanes = 0;
3982
3983         assert(radv_image_is_tc_compat_htile(iview->image));
3984
3985         if (device->physical_device->rad_info.chip_class >= GFX9) {
3986                 /* Default value for 32-bit depth surfaces. */
3987                 max_zplanes = 4;
3988
3989                 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3990                     iview->image->info.samples > 1)
3991                         max_zplanes = 2;
3992
3993                 max_zplanes = max_zplanes + 1;
3994         } else {
3995                 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
3996                         /* Do not enable Z plane compression for 16-bit depth
3997                          * surfaces because isn't supported on GFX8. Only
3998                          * 32-bit depth surfaces are supported by the hardware.
3999                          * This allows to maintain shader compatibility and to
4000                          * reduce the number of depth decompressions.
4001                          */
4002                         max_zplanes = 1;
4003                 } else {
4004                         if (iview->image->info.samples <= 1)
4005                                 max_zplanes = 5;
4006                         else if (iview->image->info.samples <= 4)
4007                                 max_zplanes = 3;
4008                         else
4009                                 max_zplanes = 2;
4010                 }
4011         }
4012
4013         return max_zplanes;
4014 }
4015
4016 static void
4017 radv_initialise_ds_surface(struct radv_device *device,
4018                            struct radv_ds_buffer_info *ds,
4019                            struct radv_image_view *iview)
4020 {
4021         unsigned level = iview->base_mip;
4022         unsigned format, stencil_format;
4023         uint64_t va, s_offs, z_offs;
4024         bool stencil_only = false;
4025         memset(ds, 0, sizeof(*ds));
4026         switch (iview->image->vk_format) {
4027         case VK_FORMAT_D24_UNORM_S8_UINT:
4028         case VK_FORMAT_X8_D24_UNORM_PACK32:
4029                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
4030                 ds->offset_scale = 2.0f;
4031                 break;
4032         case VK_FORMAT_D16_UNORM:
4033         case VK_FORMAT_D16_UNORM_S8_UINT:
4034                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
4035                 ds->offset_scale = 4.0f;
4036                 break;
4037         case VK_FORMAT_D32_SFLOAT:
4038         case VK_FORMAT_D32_SFLOAT_S8_UINT:
4039                 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
4040                         S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
4041                 ds->offset_scale = 1.0f;
4042                 break;
4043         case VK_FORMAT_S8_UINT:
4044                 stencil_only = true;
4045                 break;
4046         default:
4047                 break;
4048         }
4049
4050         format = radv_translate_dbformat(iview->image->vk_format);
4051         stencil_format = iview->image->surface.has_stencil ?
4052                 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
4053
4054         uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4055         ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
4056                 S_028008_SLICE_MAX(max_slice);
4057
4058         ds->db_htile_data_base = 0;
4059         ds->db_htile_surface = 0;
4060
4061         va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4062         s_offs = z_offs = va;
4063
4064         if (device->physical_device->rad_info.chip_class >= GFX9) {
4065                 assert(iview->image->surface.u.gfx9.surf_offset == 0);
4066                 s_offs += iview->image->surface.u.gfx9.stencil_offset;
4067
4068                 ds->db_z_info = S_028038_FORMAT(format) |
4069                         S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
4070                         S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
4071                         S_028038_MAXMIP(iview->image->info.levels - 1) |
4072                         S_028038_ZRANGE_PRECISION(1);
4073                 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
4074                         S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
4075
4076                 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
4077                 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
4078                 ds->db_depth_view |= S_028008_MIPID(level);
4079
4080                 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
4081                         S_02801C_Y_MAX(iview->image->info.height - 1);
4082
4083                 if (radv_htile_enabled(iview->image, level)) {
4084                         ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
4085
4086                         if (radv_image_is_tc_compat_htile(iview->image)) {
4087                                 unsigned max_zplanes =
4088                                         radv_calc_decompress_on_z_planes(device, iview);
4089
4090                                 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
4091                                                  S_028038_ITERATE_FLUSH(1);
4092                                 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
4093                         }
4094
4095                         if (!iview->image->surface.has_stencil)
4096                                 /* Use all of the htile_buffer for depth if there's no stencil. */
4097                                 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
4098                         va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4099                                 iview->image->htile_offset;
4100                         ds->db_htile_data_base = va >> 8;
4101                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
4102                                 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
4103                                 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
4104                 }
4105         } else {
4106                 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
4107
4108                 if (stencil_only)
4109                         level_info = &iview->image->surface.u.legacy.stencil_level[level];
4110
4111                 z_offs += iview->image->surface.u.legacy.level[level].offset;
4112                 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
4113
4114                 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
4115                 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
4116                 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
4117
4118                 if (iview->image->info.samples > 1)
4119                         ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
4120
4121                 if (device->physical_device->rad_info.chip_class >= CIK) {
4122                         struct radeon_info *info = &device->physical_device->rad_info;
4123                         unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
4124                         unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
4125                         unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
4126                         unsigned tile_mode = info->si_tile_mode_array[tiling_index];
4127                         unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
4128                         unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
4129
4130                         if (stencil_only)
4131                                 tile_mode = stencil_tile_mode;
4132
4133                         ds->db_depth_info |=
4134                                 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
4135                                 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
4136                                 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
4137                                 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
4138                                 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
4139                                 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
4140                         ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
4141                         ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
4142                 } else {
4143                         unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
4144                         ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4145                         tile_mode_index = si_tile_mode_index(iview->image, level, true);
4146                         ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
4147                         if (stencil_only)
4148                                 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4149                 }
4150
4151                 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
4152                         S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
4153                 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
4154
4155                 if (radv_htile_enabled(iview->image, level)) {
4156                         ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
4157
4158                         if (!iview->image->surface.has_stencil &&
4159                             !radv_image_is_tc_compat_htile(iview->image))
4160                                 /* Use all of the htile_buffer for depth if there's no stencil. */
4161                                 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
4162
4163                         va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4164                                 iview->image->htile_offset;
4165                         ds->db_htile_data_base = va >> 8;
4166                         ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
4167
4168                         if (radv_image_is_tc_compat_htile(iview->image)) {
4169                                 unsigned max_zplanes =
4170                                         radv_calc_decompress_on_z_planes(device, iview);
4171
4172                                 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
4173                                 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4174                         }
4175                 }
4176         }
4177
4178         ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
4179         ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
4180 }
4181
4182 VkResult radv_CreateFramebuffer(
4183         VkDevice                                    _device,
4184         const VkFramebufferCreateInfo*              pCreateInfo,
4185         const VkAllocationCallbacks*                pAllocator,
4186         VkFramebuffer*                              pFramebuffer)
4187 {
4188         RADV_FROM_HANDLE(radv_device, device, _device);
4189         struct radv_framebuffer *framebuffer;
4190
4191         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4192
4193         size_t size = sizeof(*framebuffer) +
4194                 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
4195         framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
4196                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4197         if (framebuffer == NULL)
4198                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4199
4200         framebuffer->attachment_count = pCreateInfo->attachmentCount;
4201         framebuffer->width = pCreateInfo->width;
4202         framebuffer->height = pCreateInfo->height;
4203         framebuffer->layers = pCreateInfo->layers;
4204         for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4205                 VkImageView _iview = pCreateInfo->pAttachments[i];
4206                 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
4207                 framebuffer->attachments[i].attachment = iview;
4208                 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
4209                         radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
4210                 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
4211                         radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
4212                 }
4213                 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
4214                 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
4215                 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
4216         }
4217
4218         *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
4219         return VK_SUCCESS;
4220 }
4221
4222 void radv_DestroyFramebuffer(
4223         VkDevice                                    _device,
4224         VkFramebuffer                               _fb,
4225         const VkAllocationCallbacks*                pAllocator)
4226 {
4227         RADV_FROM_HANDLE(radv_device, device, _device);
4228         RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
4229
4230         if (!fb)
4231                 return;
4232         vk_free2(&device->alloc, pAllocator, fb);
4233 }
4234
4235 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
4236 {
4237         switch (address_mode) {
4238         case VK_SAMPLER_ADDRESS_MODE_REPEAT:
4239                 return V_008F30_SQ_TEX_WRAP;
4240         case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
4241                 return V_008F30_SQ_TEX_MIRROR;
4242         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
4243                 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
4244         case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
4245                 return V_008F30_SQ_TEX_CLAMP_BORDER;
4246         case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
4247                 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
4248         default:
4249                 unreachable("illegal tex wrap mode");
4250                 break;
4251         }
4252 }
4253
4254 static unsigned
4255 radv_tex_compare(VkCompareOp op)
4256 {
4257         switch (op) {
4258         case VK_COMPARE_OP_NEVER:
4259                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
4260         case VK_COMPARE_OP_LESS:
4261                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
4262         case VK_COMPARE_OP_EQUAL:
4263                 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
4264         case VK_COMPARE_OP_LESS_OR_EQUAL:
4265                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
4266         case VK_COMPARE_OP_GREATER:
4267                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
4268         case VK_COMPARE_OP_NOT_EQUAL:
4269                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
4270         case VK_COMPARE_OP_GREATER_OR_EQUAL:
4271                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
4272         case VK_COMPARE_OP_ALWAYS:
4273                 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
4274         default:
4275                 unreachable("illegal compare mode");
4276                 break;
4277         }
4278 }
4279
4280 static unsigned
4281 radv_tex_filter(VkFilter filter, unsigned max_ansio)
4282 {
4283         switch (filter) {
4284         case VK_FILTER_NEAREST:
4285                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
4286                         V_008F38_SQ_TEX_XY_FILTER_POINT);
4287         case VK_FILTER_LINEAR:
4288                 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
4289                         V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
4290         case VK_FILTER_CUBIC_IMG:
4291         default:
4292                 fprintf(stderr, "illegal texture filter");
4293                 return 0;
4294         }
4295 }
4296
4297 static unsigned
4298 radv_tex_mipfilter(VkSamplerMipmapMode mode)
4299 {
4300         switch (mode) {
4301         case VK_SAMPLER_MIPMAP_MODE_NEAREST:
4302                 return V_008F38_SQ_TEX_Z_FILTER_POINT;
4303         case VK_SAMPLER_MIPMAP_MODE_LINEAR:
4304                 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
4305         default:
4306                 return V_008F38_SQ_TEX_Z_FILTER_NONE;
4307         }
4308 }
4309
4310 static unsigned
4311 radv_tex_bordercolor(VkBorderColor bcolor)
4312 {
4313         switch (bcolor) {
4314         case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
4315         case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
4316                 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
4317         case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
4318         case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
4319                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
4320         case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
4321         case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
4322                 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
4323         default:
4324                 break;
4325         }
4326         return 0;
4327 }
4328
4329 static unsigned
4330 radv_tex_aniso_filter(unsigned filter)
4331 {
4332         if (filter < 2)
4333                 return 0;
4334         if (filter < 4)
4335                 return 1;
4336         if (filter < 8)
4337                 return 2;
4338         if (filter < 16)
4339                 return 3;
4340         return 4;
4341 }
4342
4343 static unsigned
4344 radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
4345 {
4346         switch (mode) {
4347         case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
4348                 return SQ_IMG_FILTER_MODE_BLEND;
4349         case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
4350                 return SQ_IMG_FILTER_MODE_MIN;
4351         case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
4352                 return SQ_IMG_FILTER_MODE_MAX;
4353         default:
4354                 break;
4355         }
4356         return 0;
4357 }
4358
4359 static void
4360 radv_init_sampler(struct radv_device *device,
4361                   struct radv_sampler *sampler,
4362                   const VkSamplerCreateInfo *pCreateInfo)
4363 {
4364         uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
4365                                         (uint32_t) pCreateInfo->maxAnisotropy : 0;
4366         uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
4367         bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
4368         unsigned filter_mode = SQ_IMG_FILTER_MODE_BLEND;
4369
4370         const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
4371                 vk_find_struct_const(pCreateInfo->pNext,
4372                                      SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
4373         if (sampler_reduction)
4374                 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
4375
4376         sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
4377                              S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
4378                              S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
4379                              S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
4380                              S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
4381                              S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
4382                              S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
4383                              S_008F30_ANISO_BIAS(max_aniso_ratio) |
4384                              S_008F30_DISABLE_CUBE_WRAP(0) |
4385                              S_008F30_COMPAT_MODE(is_vi) |
4386                              S_008F30_FILTER_MODE(filter_mode));
4387         sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
4388                              S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
4389                              S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
4390         sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
4391                              S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
4392                              S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
4393                              S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
4394                              S_008F38_MIP_POINT_PRECLAMP(0) |
4395                              S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
4396                              S_008F38_FILTER_PREC_FIX(1) |
4397                              S_008F38_ANISO_OVERRIDE(is_vi));
4398         sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
4399                              S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
4400 }
4401
4402 VkResult radv_CreateSampler(
4403         VkDevice                                    _device,
4404         const VkSamplerCreateInfo*                  pCreateInfo,
4405         const VkAllocationCallbacks*                pAllocator,
4406         VkSampler*                                  pSampler)
4407 {
4408         RADV_FROM_HANDLE(radv_device, device, _device);
4409         struct radv_sampler *sampler;
4410
4411         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
4412
4413         sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
4414                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4415         if (!sampler)
4416                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4417
4418         radv_init_sampler(device, sampler, pCreateInfo);
4419         *pSampler = radv_sampler_to_handle(sampler);
4420
4421         return VK_SUCCESS;
4422 }
4423
4424 void radv_DestroySampler(
4425         VkDevice                                    _device,
4426         VkSampler                                   _sampler,
4427         const VkAllocationCallbacks*                pAllocator)
4428 {
4429         RADV_FROM_HANDLE(radv_device, device, _device);
4430         RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
4431
4432         if (!sampler)
4433                 return;
4434         vk_free2(&device->alloc, pAllocator, sampler);
4435 }
4436
4437 /* vk_icd.h does not declare this function, so we declare it here to
4438  * suppress Wmissing-prototypes.
4439  */
4440 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4441 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
4442
4443 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4444 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
4445 {
4446         /* For the full details on loader interface versioning, see
4447         * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4448         * What follows is a condensed summary, to help you navigate the large and
4449         * confusing official doc.
4450         *
4451         *   - Loader interface v0 is incompatible with later versions. We don't
4452         *     support it.
4453         *
4454         *   - In loader interface v1:
4455         *       - The first ICD entrypoint called by the loader is
4456         *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4457         *         entrypoint.
4458         *       - The ICD must statically expose no other Vulkan symbol unless it is
4459         *         linked with -Bsymbolic.
4460         *       - Each dispatchable Vulkan handle created by the ICD must be
4461         *         a pointer to a struct whose first member is VK_LOADER_DATA. The
4462         *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4463         *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4464         *         vkDestroySurfaceKHR(). The ICD must be capable of working with
4465         *         such loader-managed surfaces.
4466         *
4467         *    - Loader interface v2 differs from v1 in:
4468         *       - The first ICD entrypoint called by the loader is
4469         *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4470         *         statically expose this entrypoint.
4471         *
4472         *    - Loader interface v3 differs from v2 in:
4473         *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4474         *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4475         *          because the loader no longer does so.
4476         */
4477         *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4478         return VK_SUCCESS;
4479 }
4480
4481 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4482                              const VkMemoryGetFdInfoKHR *pGetFdInfo,
4483                              int *pFD)
4484 {
4485         RADV_FROM_HANDLE(radv_device, device, _device);
4486         RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4487
4488         assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4489
4490         /* At the moment, we support only the below handle types. */
4491         assert(pGetFdInfo->handleType ==
4492                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4493                pGetFdInfo->handleType ==
4494                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4495
4496         bool ret = radv_get_memory_fd(device, memory, pFD);
4497         if (ret == false)
4498                 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4499         return VK_SUCCESS;
4500 }
4501
4502 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4503                                        VkExternalMemoryHandleTypeFlagBitsKHR handleType,
4504                                        int fd,
4505                                        VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4506 {
4507    RADV_FROM_HANDLE(radv_device, device, _device);
4508
4509    switch (handleType) {
4510    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4511       pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4512       return VK_SUCCESS;
4513
4514    default:
4515       /* The valid usage section for this function says:
4516        *
4517        *    "handleType must not be one of the handle types defined as
4518        *    opaque."
4519        *
4520        * So opaque handle types fall into the default "unsupported" case.
4521        */
4522       return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4523    }
4524 }
4525
4526 static VkResult radv_import_opaque_fd(struct radv_device *device,
4527                                       int fd,
4528                                       uint32_t *syncobj)
4529 {
4530         uint32_t syncobj_handle = 0;
4531         int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4532         if (ret != 0)
4533                 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4534
4535         if (*syncobj)
4536                 device->ws->destroy_syncobj(device->ws, *syncobj);
4537
4538         *syncobj = syncobj_handle;
4539         close(fd);
4540
4541         return VK_SUCCESS;
4542 }
4543
4544 static VkResult radv_import_sync_fd(struct radv_device *device,
4545                                     int fd,
4546                                     uint32_t *syncobj)
4547 {
4548         /* If we create a syncobj we do it locally so that if we have an error, we don't
4549          * leave a syncobj in an undetermined state in the fence. */
4550         uint32_t syncobj_handle =  *syncobj;
4551         if (!syncobj_handle) {
4552                 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4553                 if (ret) {
4554                         return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4555                 }
4556         }
4557
4558         if (fd == -1) {
4559                 device->ws->signal_syncobj(device->ws, syncobj_handle);
4560         } else {
4561                 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4562         if (ret != 0)
4563                 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4564         }
4565
4566         *syncobj = syncobj_handle;
4567         if (fd != -1)
4568                 close(fd);
4569
4570         return VK_SUCCESS;
4571 }
4572
4573 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4574                                    const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4575 {
4576         RADV_FROM_HANDLE(radv_device, device, _device);
4577         RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4578         uint32_t *syncobj_dst = NULL;
4579
4580         if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
4581                 syncobj_dst = &sem->temp_syncobj;
4582         } else {
4583                 syncobj_dst = &sem->syncobj;
4584         }
4585
4586         switch(pImportSemaphoreFdInfo->handleType) {
4587                 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4588                         return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4589                 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4590                         return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4591                 default:
4592                         unreachable("Unhandled semaphore handle type");
4593         }
4594 }
4595
4596 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4597                                 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4598                                 int *pFd)
4599 {
4600         RADV_FROM_HANDLE(radv_device, device, _device);
4601         RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4602         int ret;
4603         uint32_t syncobj_handle;
4604
4605         if (sem->temp_syncobj)
4606                 syncobj_handle = sem->temp_syncobj;
4607         else
4608                 syncobj_handle = sem->syncobj;
4609
4610         switch(pGetFdInfo->handleType) {
4611         case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4612                 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4613                 break;
4614         case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4615                 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4616                 if (!ret) {
4617                         if (sem->temp_syncobj) {
4618                                 close (sem->temp_syncobj);
4619                                 sem->temp_syncobj = 0;
4620                         } else {
4621                                 device->ws->reset_syncobj(device->ws, syncobj_handle);
4622                         }
4623                 }
4624                 break;
4625         default:
4626                 unreachable("Unhandled semaphore handle type");
4627         }
4628
4629         if (ret)
4630                 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4631         return VK_SUCCESS;
4632 }
4633
4634 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4635         VkPhysicalDevice                            physicalDevice,
4636         const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4637         VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
4638 {
4639         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4640
4641         /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4642         if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4643             (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
4644              pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4645                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4646                 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4647                 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4648                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4649         } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4650                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4651                 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4652                 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4653                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4654         } else {
4655                 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4656                 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4657                 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4658         }
4659 }
4660
4661 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4662                                    const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4663 {
4664         RADV_FROM_HANDLE(radv_device, device, _device);
4665         RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4666         uint32_t *syncobj_dst = NULL;
4667
4668
4669         if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4670                 syncobj_dst = &fence->temp_syncobj;
4671         } else {
4672                 syncobj_dst = &fence->syncobj;
4673         }
4674
4675         switch(pImportFenceFdInfo->handleType) {
4676                 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4677                         return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4678                 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4679                         return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4680                 default:
4681                         unreachable("Unhandled fence handle type");
4682         }
4683 }
4684
4685 VkResult radv_GetFenceFdKHR(VkDevice _device,
4686                                 const VkFenceGetFdInfoKHR *pGetFdInfo,
4687                                 int *pFd)
4688 {
4689         RADV_FROM_HANDLE(radv_device, device, _device);
4690         RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4691         int ret;
4692         uint32_t syncobj_handle;
4693
4694         if (fence->temp_syncobj)
4695                 syncobj_handle = fence->temp_syncobj;
4696         else
4697                 syncobj_handle = fence->syncobj;
4698
4699         switch(pGetFdInfo->handleType) {
4700         case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4701                 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4702                 break;
4703         case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4704                 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4705                 if (!ret) {
4706                         if (fence->temp_syncobj) {
4707                                 close (fence->temp_syncobj);
4708                                 fence->temp_syncobj = 0;
4709                         } else {
4710                                 device->ws->reset_syncobj(device->ws, syncobj_handle);
4711                         }
4712                 }
4713                 break;
4714         default:
4715                 unreachable("Unhandled fence handle type");
4716         }
4717
4718         if (ret)
4719                 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4720         return VK_SUCCESS;
4721 }
4722
4723 void radv_GetPhysicalDeviceExternalFenceProperties(
4724         VkPhysicalDevice                            physicalDevice,
4725         const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4726         VkExternalFencePropertiesKHR*           pExternalFenceProperties)
4727 {
4728         RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4729
4730         if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4731             (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
4732              pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4733                 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4734                 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4735                 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4736                         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4737         } else {
4738                 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4739                 pExternalFenceProperties->compatibleHandleTypes = 0;
4740                 pExternalFenceProperties->externalFenceFeatures = 0;
4741         }
4742 }
4743
4744 VkResult
4745 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4746                                  const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4747                                  const VkAllocationCallbacks* pAllocator,
4748                                  VkDebugReportCallbackEXT* pCallback)
4749 {
4750         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4751         return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4752                                                pCreateInfo, pAllocator, &instance->alloc,
4753                                                pCallback);
4754 }
4755
4756 void
4757 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4758                                   VkDebugReportCallbackEXT _callback,
4759                                   const VkAllocationCallbacks* pAllocator)
4760 {
4761         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4762         vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4763                                          _callback, pAllocator, &instance->alloc);
4764 }
4765
4766 void
4767 radv_DebugReportMessageEXT(VkInstance _instance,
4768                           VkDebugReportFlagsEXT flags,
4769                           VkDebugReportObjectTypeEXT objectType,
4770                           uint64_t object,
4771                           size_t location,
4772                           int32_t messageCode,
4773                           const char* pLayerPrefix,
4774                           const char* pMessage)
4775 {
4776         RADV_FROM_HANDLE(radv_instance, instance, _instance);
4777         vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4778                         object, location, messageCode, pLayerPrefix, pMessage);
4779 }
4780
4781 void
4782 radv_GetDeviceGroupPeerMemoryFeatures(
4783     VkDevice                                    device,
4784     uint32_t                                    heapIndex,
4785     uint32_t                                    localDeviceIndex,
4786     uint32_t                                    remoteDeviceIndex,
4787     VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
4788 {
4789         assert(localDeviceIndex == remoteDeviceIndex);
4790
4791         *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
4792                                VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
4793                                VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
4794                                VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
4795 }