src/intel/vulkan/anv_pipeline_cache.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "util/mesa-sha1.h"
  25 #include "util/hash_table.h"
  26 #include "util/debug.h"
  27 #include "anv_private.h"
  28
  29 struct shader_bin_key {
  30    uint32_t size;
  31    uint8_t data[0];
  32 };
  33
  34 static size_t
  35 anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size,
  36                     uint32_t surface_count, uint32_t sampler_count)
  37 {
  38    const uint32_t binding_data_size =
  39       (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
  40
  41    return align_u32(sizeof(struct anv_shader_bin), 8) +
  42           align_u32(prog_data_size, 8) +
  43           align_u32(sizeof(uint32_t) + key_size, 8) +
  44           align_u32(binding_data_size, 8);
  45 }
  46
  47 static inline const struct shader_bin_key *
  48 anv_shader_bin_get_key(const struct anv_shader_bin *shader)
  49 {
  50    const void *data = shader;
  51    data += align_u32(sizeof(struct anv_shader_bin), 8);
  52    data += align_u32(shader->prog_data_size, 8);
  53    return data;
  54 }
  55
  56 struct anv_shader_bin *
  57 anv_shader_bin_create(struct anv_device *device,
  58                       const void *key_data, uint32_t key_size,
  59                       const void *kernel_data, uint32_t kernel_size,
  60                       const void *prog_data, uint32_t prog_data_size,
  61                       const struct anv_pipeline_bind_map *bind_map)
  62 {
  63    const size_t size =
  64       anv_shader_bin_size(prog_data_size, key_size,
  65                           bind_map->surface_count, bind_map->sampler_count);
  66
  67    struct anv_shader_bin *shader =
  68       anv_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
  69    if (!shader)
  70       return NULL;
  71
  72    shader->ref_cnt = 1;
  73
  74    shader->kernel =
  75       anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
  76    memcpy(shader->kernel.map, kernel_data, kernel_size);
  77    shader->kernel_size = kernel_size;
  78    shader->bind_map = *bind_map;
  79    shader->prog_data_size = prog_data_size;
  80
  81    /* Now we fill out the floating data at the end */
  82    void *data = shader;
  83    data += align_u32(sizeof(struct anv_shader_bin), 8);
  84
  85    memcpy(data, prog_data, prog_data_size);
  86    data += align_u32(prog_data_size, 8);
  87
  88    struct shader_bin_key *key = data;
  89    key->size = key_size;
  90    memcpy(key->data, key_data, key_size);
  91    data += align_u32(sizeof(*key) + key_size, 8);
  92
  93    shader->bind_map.surface_to_descriptor = data;
  94    memcpy(data, bind_map->surface_to_descriptor,
  95           bind_map->surface_count * sizeof(struct anv_pipeline_binding));
  96    data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
  97
  98    shader->bind_map.sampler_to_descriptor = data;
  99    memcpy(data, bind_map->sampler_to_descriptor,
 100           bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
 101
 102    return shader;
 103 }
 104
 105 void
 106 anv_shader_bin_destroy(struct anv_device *device,
 107                        struct anv_shader_bin *shader)
 108 {
 109    assert(shader->ref_cnt == 0);
 110    anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
 111    anv_free(&device->alloc, shader);
 112 }
 113
 114 static size_t
 115 anv_shader_bin_data_size(const struct anv_shader_bin *shader)
 116 {
 117    return anv_shader_bin_size(shader->prog_data_size,
 118                               anv_shader_bin_get_key(shader)->size,
 119                               shader->bind_map.surface_count,
 120                               shader->bind_map.sampler_count) +
 121           align_u32(shader->kernel_size, 8);
 122 }
 123
 124 static void
 125 anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
 126 {
 127    size_t struct_size =
 128       anv_shader_bin_size(shader->prog_data_size,
 129                           anv_shader_bin_get_key(shader)->size,
 130                           shader->bind_map.surface_count,
 131                           shader->bind_map.sampler_count);
 132
 133    memcpy(data, shader, struct_size);
 134    data += struct_size;
 135
 136    memcpy(data, shader->kernel.map, shader->kernel_size);
 137 }
 138
 139 /* Remaining work:
 140  *
 141  * - Compact binding table layout so it's tight and not dependent on
 142  *   descriptor set layout.
 143  *
 144  * - Review prog_data struct for size and cacheability: struct
 145  *   brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
 146  *   bit quantities etc; param, pull_param, and image_params are pointers, we
 147  *   just need the compation map. use bit fields for all bools, eg
 148  *   dual_src_blend.
 149  */
 150
 151 static uint32_t
 152 shader_bin_key_hash_func(const void *void_key)
 153 {
 154    const struct shader_bin_key *key = void_key;
 155    return _mesa_hash_data(key->data, key->size);
 156 }
 157
 158 static bool
 159 shader_bin_key_compare_func(const void *void_a, const void *void_b)
 160 {
 161    const struct shader_bin_key *a = void_a, *b = void_b;
 162    if (a->size != b->size)
 163       return false;
 164
 165    return memcmp(a->data, b->data, a->size) == 0;
 166 }
 167
 168 void
 169 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
 170                         struct anv_device *device,
 171                         bool cache_enabled)
 172 {
 173    cache->device = device;
 174    pthread_mutex_init(&cache->mutex, NULL);
 175
 176    if (cache_enabled) {
 177       cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
 178                                              shader_bin_key_compare_func);
 179    } else {
 180       cache->cache = NULL;
 181    }
 182 }
 183
 184 void
 185 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
 186 {
 187    pthread_mutex_destroy(&cache->mutex);
 188
 189    if (cache->cache) {
 190       /* This is a bit unfortunate.  In order to keep things from randomly
 191        * going away, the shader cache has to hold a reference to all shader
 192        * binaries it contains.  We unref them when we destroy the cache.
 193        */
 194       struct hash_entry *entry;
 195       hash_table_foreach(cache->cache, entry)
 196          anv_shader_bin_unref(cache->device, entry->data);
 197
 198       _mesa_hash_table_destroy(cache->cache, NULL);
 199    }
 200 }
 201
 202 void
 203 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
 204                 struct anv_shader_module *module,
 205                 const char *entrypoint,
 206                 const struct anv_pipeline_layout *pipeline_layout,
 207                 const VkSpecializationInfo *spec_info)
 208 {
 209    struct mesa_sha1 *ctx;
 210
 211    ctx = _mesa_sha1_init();
 212    _mesa_sha1_update(ctx, key, key_size);
 213    _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
 214    _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
 215    if (pipeline_layout) {
 216       _mesa_sha1_update(ctx, pipeline_layout->sha1,
 217                         sizeof(pipeline_layout->sha1));
 218    }
 219    /* hash in shader stage, pipeline layout? */
 220    if (spec_info) {
 221       _mesa_sha1_update(ctx, spec_info->pMapEntries,
 222                         spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
 223       _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
 224    }
 225    _mesa_sha1_final(ctx, hash);
 226 }
 227
 228 static struct anv_shader_bin *
 229 anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
 230                                  const void *key_data, uint32_t key_size)
 231 {
 232    uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
 233    struct shader_bin_key *key = (void *)vla;
 234    key->size = key_size;
 235    memcpy(key->data, key_data, key_size);
 236
 237    struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
 238    if (entry)
 239       return entry->data;
 240    else
 241       return NULL;
 242 }
 243
 244 struct anv_shader_bin *
 245 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
 246                           const void *key_data, uint32_t key_size)
 247 {
 248    if (!cache->cache)
 249       return NULL;
 250
 251    pthread_mutex_lock(&cache->mutex);
 252
 253    struct anv_shader_bin *shader =
 254       anv_pipeline_cache_search_locked(cache, key_data, key_size);
 255
 256    pthread_mutex_unlock(&cache->mutex);
 257
 258    /* We increment refcount before handing it to the caller */
 259    if (shader)
 260       anv_shader_bin_ref(shader);
 261
 262    return shader;
 263 }
 264
 265 static struct anv_shader_bin *
 266 anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
 267                               const void *key_data, uint32_t key_size,
 268                               const void *kernel_data, uint32_t kernel_size,
 269                               const void *prog_data, uint32_t prog_data_size,
 270                               const struct anv_pipeline_bind_map *bind_map)
 271 {
 272    struct anv_shader_bin *shader =
 273       anv_pipeline_cache_search_locked(cache, key_data, key_size);
 274    if (shader)
 275       return shader;
 276
 277    struct anv_shader_bin *bin =
 278       anv_shader_bin_create(cache->device, key_data, key_size,
 279                             kernel_data, kernel_size,
 280                             prog_data, prog_data_size, bind_map);
 281    if (!bin)
 282       return NULL;
 283
 284    _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin);
 285
 286    return bin;
 287 }
 288
 289 struct anv_shader_bin *
 290 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
 291                                  const void *key_data, uint32_t key_size,
 292                                  const void *kernel_data, uint32_t kernel_size,
 293                                  const void *prog_data, uint32_t prog_data_size,
 294                                  const struct anv_pipeline_bind_map *bind_map)
 295 {
 296    if (cache->cache) {
 297       pthread_mutex_lock(&cache->mutex);
 298
 299       struct anv_shader_bin *bin =
 300          anv_pipeline_cache_add_shader(cache, key_data, key_size,
 301                                        kernel_data, kernel_size,
 302                                        prog_data, prog_data_size, bind_map);
 303
 304       pthread_mutex_unlock(&cache->mutex);
 305
 306       /* We increment refcount before handing it to the caller */
 307       anv_shader_bin_ref(bin);
 308
 309       return bin;
 310    } else {
 311       /* In this case, we're not caching it so the caller owns it entirely */
 312       return anv_shader_bin_create(cache->device, key_data, key_size,
 313                                    kernel_data, kernel_size,
 314                                    prog_data, prog_data_size, bind_map);
 315    }
 316 }
 317
 318 struct cache_header {
 319    uint32_t header_size;
 320    uint32_t header_version;
 321    uint32_t vendor_id;
 322    uint32_t device_id;
 323    uint8_t  uuid[VK_UUID_SIZE];
 324 };
 325
 326 static void
 327 anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
 328                         const void *data, size_t size)
 329 {
 330    struct anv_device *device = cache->device;
 331    struct cache_header header;
 332    uint8_t uuid[VK_UUID_SIZE];
 333
 334    if (cache->cache == NULL)
 335       return;
 336
 337    if (size < sizeof(header))
 338       return;
 339    memcpy(&header, data, sizeof(header));
 340    if (header.header_size < sizeof(header))
 341       return;
 342    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
 343       return;
 344    if (header.vendor_id != 0x8086)
 345       return;
 346    if (header.device_id != device->chipset_id)
 347       return;
 348    anv_device_get_cache_uuid(uuid);
 349    if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
 350       return;
 351
 352    const void *end = data + size;
 353    const void *p = data + header.header_size;
 354
 355    /* Count is the total number of valid entries */
 356    uint32_t count;
 357    if (p + sizeof(count) >= end)
 358       return;
 359    memcpy(&count, p, sizeof(count));
 360    p += align_u32(sizeof(count), 8);
 361
 362    for (uint32_t i = 0; i < count; i++) {
 363       struct anv_shader_bin bin;
 364       if (p + sizeof(bin) > end)
 365          break;
 366       memcpy(&bin, p, sizeof(bin));
 367       p += align_u32(sizeof(struct anv_shader_bin), 8);
 368
 369       const void *prog_data = p;
 370       p += align_u32(bin.prog_data_size, 8);
 371
 372       struct shader_bin_key key;
 373       if (p + sizeof(key) > end)
 374          break;
 375       memcpy(&key, p, sizeof(key));
 376       const void *key_data = p + sizeof(key);
 377       p += align_u32(sizeof(key) + key.size, 8);
 378
 379       /* We're going to memcpy this so getting rid of const is fine */
 380       struct anv_pipeline_binding *bindings = (void *)p;
 381       p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
 382                      sizeof(struct anv_pipeline_binding), 8);
 383       bin.bind_map.surface_to_descriptor = bindings;
 384       bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
 385
 386       const void *kernel_data = p;
 387       p += align_u32(bin.kernel_size, 8);
 388
 389       if (p > end)
 390          break;
 391
 392       anv_pipeline_cache_add_shader(cache, key_data, key.size,
 393                                     kernel_data, bin.kernel_size,
 394                                     prog_data, bin.prog_data_size,
 395                                     &bin.bind_map);
 396    }
 397 }
 398
 399 static bool
 400 pipeline_cache_enabled()
 401 {
 402    static int enabled = -1;
 403    if (enabled < 0)
 404       enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
 405    return enabled;
 406 }
 407
 408 VkResult anv_CreatePipelineCache(
 409     VkDevice                                    _device,
 410     const VkPipelineCacheCreateInfo*            pCreateInfo,
 411     const VkAllocationCallbacks*                pAllocator,
 412     VkPipelineCache*                            pPipelineCache)
 413 {
 414    ANV_FROM_HANDLE(anv_device, device, _device);
 415    struct anv_pipeline_cache *cache;
 416
 417    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
 418    assert(pCreateInfo->flags == 0);
 419
 420    cache = anv_alloc2(&device->alloc, pAllocator,
 421                        sizeof(*cache), 8,
 422                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 423    if (cache == NULL)
 424       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 425
 426    anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
 427
 428    if (pCreateInfo->initialDataSize > 0)
 429       anv_pipeline_cache_load(cache,
 430                               pCreateInfo->pInitialData,
 431                               pCreateInfo->initialDataSize);
 432
 433    *pPipelineCache = anv_pipeline_cache_to_handle(cache);
 434
 435    return VK_SUCCESS;
 436 }
 437
 438 void anv_DestroyPipelineCache(
 439     VkDevice                                    _device,
 440     VkPipelineCache                             _cache,
 441     const VkAllocationCallbacks*                pAllocator)
 442 {
 443    ANV_FROM_HANDLE(anv_device, device, _device);
 444    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 445
 446    anv_pipeline_cache_finish(cache);
 447
 448    anv_free2(&device->alloc, pAllocator, cache);
 449 }
 450
 451 VkResult anv_GetPipelineCacheData(
 452     VkDevice                                    _device,
 453     VkPipelineCache                             _cache,
 454     size_t*                                     pDataSize,
 455     void*                                       pData)
 456 {
 457    ANV_FROM_HANDLE(anv_device, device, _device);
 458    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 459    struct cache_header *header;
 460
 461    if (pData == NULL) {
 462       size_t size = align_u32(sizeof(*header), 8) +
 463                     align_u32(sizeof(uint32_t), 8);
 464
 465       if (cache->cache) {
 466          struct hash_entry *entry;
 467          hash_table_foreach(cache->cache, entry)
 468             size += anv_shader_bin_data_size(entry->data);
 469       }
 470
 471       *pDataSize = size;
 472       return VK_SUCCESS;
 473    }
 474
 475    if (*pDataSize < sizeof(*header)) {
 476       *pDataSize = 0;
 477       return VK_INCOMPLETE;
 478    }
 479
 480    void *p = pData, *end = pData + *pDataSize;
 481    header = p;
 482    header->header_size = sizeof(*header);
 483    header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
 484    header->vendor_id = 0x8086;
 485    header->device_id = device->chipset_id;
 486    anv_device_get_cache_uuid(header->uuid);
 487    p += align_u32(header->header_size, 8);
 488
 489    uint32_t *count = p;
 490    p += align_u32(sizeof(*count), 8);
 491    *count = 0;
 492
 493    if (cache->cache) {
 494       struct hash_entry *entry;
 495       hash_table_foreach(cache->cache, entry) {
 496          struct anv_shader_bin *shader = entry->data;
 497          size_t data_size = anv_shader_bin_data_size(entry->data);
 498          if (p + data_size > end)
 499             break;
 500
 501          anv_shader_bin_write_data(shader, p);
 502          p += data_size;
 503
 504          (*count)++;
 505       }
 506    }
 507
 508    *pDataSize = p - pData;
 509
 510    return VK_SUCCESS;
 511 }
 512
 513 VkResult anv_MergePipelineCaches(
 514     VkDevice                                    _device,
 515     VkPipelineCache                             destCache,
 516     uint32_t                                    srcCacheCount,
 517     const VkPipelineCache*                      pSrcCaches)
 518 {
 519    ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
 520
 521    if (!dst->cache)
 522       return VK_SUCCESS;
 523
 524    for (uint32_t i = 0; i < srcCacheCount; i++) {
 525       ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
 526       if (!src->cache)
 527          continue;
 528
 529       struct hash_entry *entry;
 530       hash_table_foreach(src->cache, entry) {
 531          struct anv_shader_bin *bin = entry->data;
 532          if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin)))
 533             continue;
 534
 535          anv_shader_bin_ref(bin);
 536          _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin);
 537       }
 538    }
 539
 540    return VK_SUCCESS;
 541 }