OSDN Git Service

radv: clean up radv_vi_dcc_enabled()
[android-x86/external-mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "gfx9d.h"
35 #include "util/debug.h"
36 #include "util/u_atomic.h"
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39                    const struct radv_image_create_info *create_info)
40 {
41         const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
42
43         if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
44                 assert(pCreateInfo->samples <= 1);
45                 return RADEON_SURF_MODE_LINEAR_ALIGNED;
46         }
47
48         if (!vk_format_is_compressed(pCreateInfo->format) &&
49             !vk_format_is_depth_or_stencil(pCreateInfo->format)
50             && device->physical_device->rad_info.chip_class <= VI) {
51                 /* this causes hangs in some VK CTS tests on GFX9. */
52                 /* Textures with a very small height are recommended to be linear. */
53                 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
54                     /* Only very thin and long 2D textures should benefit from
55                      * linear_aligned. */
56                     (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
57                         return RADEON_SURF_MODE_LINEAR_ALIGNED;
58         }
59
60         /* MSAA resources must be 2D tiled. */
61         if (pCreateInfo->samples > 1)
62                 return RADEON_SURF_MODE_2D;
63
64         return RADEON_SURF_MODE_2D;
65 }
66
67 static bool
68 radv_image_is_tc_compat_htile(struct radv_device *device,
69                               const VkImageCreateInfo *pCreateInfo)
70 {
71         /* TC-compat HTILE is only available for GFX8+. */
72         if (device->physical_device->rad_info.chip_class < VI)
73                 return false;
74
75         if (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)
76                 return false;
77
78         if (pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
79                                   VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR))
80                 return false;
81
82         if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
83                 return false;
84
85         if (pCreateInfo->mipLevels > 1)
86                 return false;
87
88         /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
89          * tests - disable for now */
90         if (pCreateInfo->samples >= 2 &&
91             pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
92                 return false;
93
94         /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
95          * supports 32-bit. Though, it's possible to enable TC-compat for
96          * 16-bit depth surfaces if no Z planes are compressed.
97          */
98         if (pCreateInfo->format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
99             pCreateInfo->format != VK_FORMAT_D32_SFLOAT &&
100             pCreateInfo->format != VK_FORMAT_D16_UNORM)
101                 return false;
102
103         return true;
104 }
105
106 static int
107 radv_init_surface(struct radv_device *device,
108                   struct radeon_surf *surface,
109                   const struct radv_image_create_info *create_info)
110 {
111         const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
112         unsigned array_mode = radv_choose_tiling(device, create_info);
113         const struct vk_format_description *desc =
114                 vk_format_description(pCreateInfo->format);
115         bool is_depth, is_stencil, blendable;
116
117         is_depth = vk_format_has_depth(desc);
118         is_stencil = vk_format_has_stencil(desc);
119
120         surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format);
121         surface->blk_h = vk_format_get_blockheight(pCreateInfo->format);
122
123         surface->bpe = vk_format_get_blocksize(vk_format_depth_only(pCreateInfo->format));
124         /* align byte per element on dword */
125         if (surface->bpe == 3) {
126                 surface->bpe = 4;
127         }
128         surface->flags = RADEON_SURF_SET(array_mode, MODE);
129
130         switch (pCreateInfo->imageType){
131         case VK_IMAGE_TYPE_1D:
132                 if (pCreateInfo->arrayLayers > 1)
133                         surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
134                 else
135                         surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
136                 break;
137         case VK_IMAGE_TYPE_2D:
138                 if (pCreateInfo->arrayLayers > 1)
139                         surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
140                 else
141                         surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
142                 break;
143         case VK_IMAGE_TYPE_3D:
144                 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
145                 break;
146         default:
147                 unreachable("unhandled image type");
148         }
149
150         if (is_depth) {
151                 surface->flags |= RADEON_SURF_ZBUFFER;
152                 if (radv_image_is_tc_compat_htile(device, pCreateInfo))
153                         surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
154         }
155
156         if (is_stencil)
157                 surface->flags |= RADEON_SURF_SBUFFER;
158
159         surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
160
161         bool dcc_compatible_formats = radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
162         if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
163                 const struct  VkImageFormatListCreateInfoKHR *format_list =
164                           (const struct  VkImageFormatListCreateInfoKHR *)
165                                 vk_find_struct_const(pCreateInfo->pNext,
166                                                      IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
167
168                 /* We have to ignore the existence of the list if viewFormatCount = 0 */
169                 if (format_list && format_list->viewFormatCount) {
170                         /* compatibility is transitive, so we only need to check
171                          * one format with everything else. */
172                         for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
173                                 if (!radv_dcc_formats_compatible(pCreateInfo->format,
174                                                                  format_list->pViewFormats[i]))
175                                         dcc_compatible_formats = false;
176                         }
177                 } else {
178                         dcc_compatible_formats = false;
179                 }
180         }
181
182         if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
183             (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) ||
184             !dcc_compatible_formats ||
185             (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
186             pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
187             device->physical_device->rad_info.chip_class < VI ||
188             create_info->scanout || (device->instance->debug_flags & RADV_DEBUG_NO_DCC) ||
189             pCreateInfo->samples >= 2)
190                 surface->flags |= RADEON_SURF_DISABLE_DCC;
191         if (create_info->scanout)
192                 surface->flags |= RADEON_SURF_SCANOUT;
193         return 0;
194 }
195
196 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
197 {
198         return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
199 }
200
201 static inline unsigned
202 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
203 {
204         if (stencil)
205                 return image->surface.u.legacy.stencil_tiling_index[level];
206         else
207                 return image->surface.u.legacy.tiling_index[level];
208 }
209
210 static unsigned radv_map_swizzle(unsigned swizzle)
211 {
212         switch (swizzle) {
213         case VK_SWIZZLE_Y:
214                 return V_008F0C_SQ_SEL_Y;
215         case VK_SWIZZLE_Z:
216                 return V_008F0C_SQ_SEL_Z;
217         case VK_SWIZZLE_W:
218                 return V_008F0C_SQ_SEL_W;
219         case VK_SWIZZLE_0:
220                 return V_008F0C_SQ_SEL_0;
221         case VK_SWIZZLE_1:
222                 return V_008F0C_SQ_SEL_1;
223         default: /* VK_SWIZZLE_X */
224                 return V_008F0C_SQ_SEL_X;
225         }
226 }
227
228 static void
229 radv_make_buffer_descriptor(struct radv_device *device,
230                             struct radv_buffer *buffer,
231                             VkFormat vk_format,
232                             unsigned offset,
233                             unsigned range,
234                             uint32_t *state)
235 {
236         const struct vk_format_description *desc;
237         unsigned stride;
238         uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
239         uint64_t va = gpu_address + buffer->offset;
240         unsigned num_format, data_format;
241         int first_non_void;
242         desc = vk_format_description(vk_format);
243         first_non_void = vk_format_get_first_non_void_channel(vk_format);
244         stride = desc->block.bits / 8;
245
246         num_format = radv_translate_buffer_numformat(desc, first_non_void);
247         data_format = radv_translate_buffer_dataformat(desc, first_non_void);
248
249         va += offset;
250         state[0] = va;
251         state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
252                 S_008F04_STRIDE(stride);
253
254         if (device->physical_device->rad_info.chip_class != VI && stride) {
255                 range /= stride;
256         }
257
258         state[2] = range;
259         state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
260                    S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
261                    S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
262                    S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) |
263                    S_008F0C_NUM_FORMAT(num_format) |
264                    S_008F0C_DATA_FORMAT(data_format);
265 }
266
267 static void
268 si_set_mutable_tex_desc_fields(struct radv_device *device,
269                                struct radv_image *image,
270                                const struct legacy_surf_level *base_level_info,
271                                unsigned base_level, unsigned first_level,
272                                unsigned block_width, bool is_stencil,
273                                bool is_storage_image, uint32_t *state)
274 {
275         uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
276         uint64_t va = gpu_address;
277         enum chip_class chip_class = device->physical_device->rad_info.chip_class;
278         uint64_t meta_va = 0;
279         if (chip_class >= GFX9) {
280                 if (is_stencil)
281                         va += image->surface.u.gfx9.stencil_offset;
282                 else
283                         va += image->surface.u.gfx9.surf_offset;
284         } else
285                 va += base_level_info->offset;
286
287         state[0] = va >> 8;
288         if (chip_class >= GFX9 ||
289             base_level_info->mode == RADEON_SURF_MODE_2D)
290                 state[0] |= image->surface.tile_swizzle;
291         state[1] &= C_008F14_BASE_ADDRESS_HI;
292         state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
293
294         if (chip_class >= VI) {
295                 state[6] &= C_008F28_COMPRESSION_EN;
296                 state[7] = 0;
297                 if (!is_storage_image && radv_dcc_enabled(image, first_level)) {
298                         meta_va = gpu_address + image->dcc_offset;
299                         if (chip_class <= VI)
300                                 meta_va += base_level_info->dcc_offset;
301                 } else if(!is_storage_image && image->tc_compatible_htile &&
302                           radv_image_has_htile(image)) {
303                         meta_va = gpu_address + image->htile_offset;
304                 }
305
306                 if (meta_va) {
307                         state[6] |= S_008F28_COMPRESSION_EN(1);
308                         state[7] = meta_va >> 8;
309                         state[7] |= image->surface.tile_swizzle;
310                 }
311         }
312
313         if (chip_class >= GFX9) {
314                 state[3] &= C_008F1C_SW_MODE;
315                 state[4] &= C_008F20_PITCH_GFX9;
316
317                 if (is_stencil) {
318                         state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.stencil.swizzle_mode);
319                         state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.stencil.epitch);
320                 } else {
321                         state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.surf.swizzle_mode);
322                         state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.surf.epitch);
323                 }
324
325                 state[5] &= C_008F24_META_DATA_ADDRESS &
326                             C_008F24_META_PIPE_ALIGNED &
327                             C_008F24_META_RB_ALIGNED;
328                 if (meta_va) {
329                         struct gfx9_surf_meta_flags meta;
330
331                         if (image->dcc_offset)
332                                 meta = image->surface.u.gfx9.dcc;
333                         else
334                                 meta = image->surface.u.gfx9.htile;
335
336                         state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
337                                     S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
338                                     S_008F24_META_RB_ALIGNED(meta.rb_aligned);
339                 }
340         } else {
341                 /* SI-CI-VI */
342                 unsigned pitch = base_level_info->nblk_x * block_width;
343                 unsigned index = si_tile_mode_index(image, base_level, is_stencil);
344
345                 state[3] &= C_008F1C_TILING_INDEX;
346                 state[3] |= S_008F1C_TILING_INDEX(index);
347                 state[4] &= C_008F20_PITCH_GFX6;
348                 state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
349         }
350 }
351
352 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
353                              unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
354 {
355         if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
356                 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
357
358         /* GFX9 allocates 1D textures as 2D. */
359         if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
360                 image_type = VK_IMAGE_TYPE_2D;
361         switch (image_type) {
362         case VK_IMAGE_TYPE_1D:
363                 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
364         case VK_IMAGE_TYPE_2D:
365                 if (nr_samples > 1)
366                         return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
367                 else
368                         return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
369         case VK_IMAGE_TYPE_3D:
370                 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
371                         return V_008F1C_SQ_RSRC_IMG_3D;
372                 else
373                         return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
374         default:
375                 unreachable("illegale image type");
376         }
377 }
378
379 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
380 {
381         unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
382
383         if (swizzle[3] == VK_SWIZZLE_X) {
384                 /* For the pre-defined border color values (white, opaque
385                  * black, transparent black), the only thing that matters is
386                  * that the alpha channel winds up in the correct place
387                  * (because the RGB channels are all the same) so either of
388                  * these enumerations will work.
389                  */
390                 if (swizzle[2] == VK_SWIZZLE_Y)
391                         bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
392                 else
393                         bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
394         } else if (swizzle[0] == VK_SWIZZLE_X) {
395                 if (swizzle[1] == VK_SWIZZLE_Y)
396                         bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
397                 else
398                         bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
399         } else if (swizzle[1] == VK_SWIZZLE_X) {
400                 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
401         } else if (swizzle[2] == VK_SWIZZLE_X) {
402                 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
403         }
404
405         return bc_swizzle;
406 }
407
408 /**
409  * Build the sampler view descriptor for a texture.
410  */
411 static void
412 si_make_texture_descriptor(struct radv_device *device,
413                            struct radv_image *image,
414                            bool is_storage_image,
415                            VkImageViewType view_type,
416                            VkFormat vk_format,
417                            const VkComponentMapping *mapping,
418                            unsigned first_level, unsigned last_level,
419                            unsigned first_layer, unsigned last_layer,
420                            unsigned width, unsigned height, unsigned depth,
421                            uint32_t *state,
422                            uint32_t *fmask_state)
423 {
424         const struct vk_format_description *desc;
425         enum vk_swizzle swizzle[4];
426         int first_non_void;
427         unsigned num_format, data_format, type;
428
429         desc = vk_format_description(vk_format);
430
431         if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
432                 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
433                 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
434         } else {
435                 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
436         }
437
438         first_non_void = vk_format_get_first_non_void_channel(vk_format);
439
440         num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
441         if (num_format == ~0) {
442                 num_format = 0;
443         }
444
445         data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
446         if (data_format == ~0) {
447                 data_format = 0;
448         }
449
450         /* S8 with either Z16 or Z32 HTILE need a special format. */
451         if (device->physical_device->rad_info.chip_class >= GFX9 &&
452             vk_format == VK_FORMAT_S8_UINT &&
453             image->tc_compatible_htile) {
454                 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
455                         data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
456                 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
457                         data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
458         }
459         type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
460                             is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
461         if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
462                 height = 1;
463                 depth = image->info.array_size;
464         } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
465                    type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
466                 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
467                         depth = image->info.array_size;
468         } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
469                 depth = image->info.array_size / 6;
470
471         state[0] = 0;
472         state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
473                     S_008F14_NUM_FORMAT_GFX6(num_format));
474         state[2] = (S_008F18_WIDTH(width - 1) |
475                     S_008F18_HEIGHT(height - 1) |
476                     S_008F18_PERF_MOD(4));
477         state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
478                     S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
479                     S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
480                     S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
481                     S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
482                                         0 : first_level) |
483                     S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
484                                         util_logbase2(image->info.samples) :
485                                         last_level) |
486                     S_008F1C_TYPE(type));
487         state[4] = 0;
488         state[5] = S_008F24_BASE_ARRAY(first_layer);
489         state[6] = 0;
490         state[7] = 0;
491
492         if (device->physical_device->rad_info.chip_class >= GFX9) {
493                 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
494
495                 /* Depth is the the last accessible layer on Gfx9.
496                  * The hw doesn't need to know the total number of layers.
497                  */
498                 if (type == V_008F1C_SQ_RSRC_IMG_3D)
499                         state[4] |= S_008F20_DEPTH(depth - 1);
500                 else
501                         state[4] |= S_008F20_DEPTH(last_layer);
502
503                 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
504                 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
505                                              util_logbase2(image->info.samples) :
506                                              image->info.levels - 1);
507         } else {
508                 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
509                 state[4] |= S_008F20_DEPTH(depth - 1);
510                 state[5] |= S_008F24_LAST_ARRAY(last_layer);
511         }
512         if (image->dcc_offset) {
513                 unsigned swap = radv_translate_colorswap(vk_format, FALSE);
514
515                 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
516         } else {
517                 /* The last dword is unused by hw. The shader uses it to clear
518                  * bits in the first dword of sampler state.
519                  */
520                 if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
521                         if (first_level == last_level)
522                                 state[7] = C_008F30_MAX_ANISO_RATIO;
523                         else
524                                 state[7] = 0xffffffff;
525                 }
526         }
527
528         /* Initialize the sampler view for FMASK. */
529         if (radv_image_has_fmask(image)) {
530                 uint32_t fmask_format, num_format;
531                 uint64_t gpu_address = radv_buffer_get_va(image->bo);
532                 uint64_t va;
533
534                 va = gpu_address + image->offset + image->fmask.offset;
535
536                 if (device->physical_device->rad_info.chip_class >= GFX9) {
537                         fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
538                         switch (image->info.samples) {
539                         case 2:
540                                 num_format = V_008F14_IMG_FMASK_8_2_2;
541                                 break;
542                         case 4:
543                                 num_format = V_008F14_IMG_FMASK_8_4_4;
544                                 break;
545                         case 8:
546                                 num_format = V_008F14_IMG_FMASK_32_8_8;
547                                 break;
548                         default:
549                                 unreachable("invalid nr_samples");
550                         }
551                 } else {
552                         switch (image->info.samples) {
553                         case 2:
554                                 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
555                                 break;
556                         case 4:
557                                 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
558                                 break;
559                         case 8:
560                                 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
561                                 break;
562                         default:
563                                 assert(0);
564                                 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
565                         }
566                         num_format = V_008F14_IMG_NUM_FORMAT_UINT;
567                 }
568
569                 fmask_state[0] = va >> 8;
570                 fmask_state[0] |= image->fmask.tile_swizzle;
571                 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
572                         S_008F14_DATA_FORMAT_GFX6(fmask_format) |
573                         S_008F14_NUM_FORMAT_GFX6(num_format);
574                 fmask_state[2] = S_008F18_WIDTH(width - 1) |
575                         S_008F18_HEIGHT(height - 1);
576                 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
577                         S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
578                         S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
579                         S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
580                         S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false));
581                 fmask_state[4] = 0;
582                 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
583                 fmask_state[6] = 0;
584                 fmask_state[7] = 0;
585
586                 if (device->physical_device->rad_info.chip_class >= GFX9) {
587                         fmask_state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.fmask.swizzle_mode);
588                         fmask_state[4] |= S_008F20_DEPTH(last_layer) |
589                                           S_008F20_PITCH_GFX9(image->surface.u.gfx9.fmask.epitch);
590                         fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->surface.u.gfx9.cmask.pipe_aligned) |
591                                           S_008F24_META_RB_ALIGNED(image->surface.u.gfx9.cmask.rb_aligned);
592                 } else {
593                         fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
594                         fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
595                                 S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
596                         fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
597                 }
598         } else if (fmask_state)
599                 memset(fmask_state, 0, 8 * 4);
600 }
601
602 static void
603 radv_query_opaque_metadata(struct radv_device *device,
604                            struct radv_image *image,
605                            struct radeon_bo_metadata *md)
606 {
607         static const VkComponentMapping fixedmapping;
608         uint32_t desc[8], i;
609
610         /* Metadata image format format version 1:
611          * [0] = 1 (metadata format identifier)
612          * [1] = (VENDOR_ID << 16) | PCI_ID
613          * [2:9] = image descriptor for the whole resource
614          *         [2] is always 0, because the base address is cleared
615          *         [9] is the DCC offset bits [39:8] from the beginning of
616          *             the buffer
617          * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
618          */
619         md->metadata[0] = 1; /* metadata image format version 1 */
620
621         /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
622         md->metadata[1] = si_get_bo_metadata_word1(device);
623
624
625         si_make_texture_descriptor(device, image, false,
626                                    (VkImageViewType)image->type, image->vk_format,
627                                    &fixedmapping, 0, image->info.levels - 1, 0,
628                                    image->info.array_size,
629                                    image->info.width, image->info.height,
630                                    image->info.depth,
631                                    desc, NULL);
632
633         si_set_mutable_tex_desc_fields(device, image, &image->surface.u.legacy.level[0], 0, 0,
634                                        image->surface.blk_w, false, false, desc);
635
636         /* Clear the base address and set the relative DCC offset. */
637         desc[0] = 0;
638         desc[1] &= C_008F14_BASE_ADDRESS_HI;
639         desc[7] = image->dcc_offset >> 8;
640
641         /* Dwords [2:9] contain the image descriptor. */
642         memcpy(&md->metadata[2], desc, sizeof(desc));
643
644         /* Dwords [10:..] contain the mipmap level offsets. */
645         if (device->physical_device->rad_info.chip_class <= VI) {
646                 for (i = 0; i <= image->info.levels - 1; i++)
647                         md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
648                 md->size_metadata = (11 + image->info.levels - 1) * 4;
649         }
650 }
651
652 void
653 radv_init_metadata(struct radv_device *device,
654                    struct radv_image *image,
655                    struct radeon_bo_metadata *metadata)
656 {
657         struct radeon_surf *surface = &image->surface;
658
659         memset(metadata, 0, sizeof(*metadata));
660
661         if (device->physical_device->rad_info.chip_class >= GFX9) {
662                 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
663         } else {
664                 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
665                         RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
666                 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
667                         RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
668                 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
669                 metadata->u.legacy.bankw = surface->u.legacy.bankw;
670                 metadata->u.legacy.bankh = surface->u.legacy.bankh;
671                 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
672                 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
673                 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
674                 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
675                 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
676         }
677         radv_query_opaque_metadata(device, image, metadata);
678 }
679
680 /* The number of samples can be specified independently of the texture. */
681 static void
682 radv_image_get_fmask_info(struct radv_device *device,
683                           struct radv_image *image,
684                           unsigned nr_samples,
685                           struct radv_fmask_info *out)
686 {
687         /* FMASK is allocated like an ordinary texture. */
688         struct radeon_surf fmask = {};
689         struct ac_surf_info info = image->info;
690         memset(out, 0, sizeof(*out));
691
692         if (device->physical_device->rad_info.chip_class >= GFX9) {
693                 out->alignment = image->surface.u.gfx9.fmask_alignment;
694                 out->size = image->surface.u.gfx9.fmask_size;
695                 return;
696         }
697
698         fmask.blk_w = image->surface.blk_w;
699         fmask.blk_h = image->surface.blk_h;
700         info.samples = 1;
701         fmask.flags = image->surface.flags | RADEON_SURF_FMASK;
702
703         if (!image->shareable)
704                 info.surf_index = &device->fmask_mrt_offset_counter;
705
706         /* Force 2D tiling if it wasn't set. This may occur when creating
707          * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
708          * destination buffer must have an FMASK too. */
709         fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
710         fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
711
712         switch (nr_samples) {
713         case 2:
714         case 4:
715                 fmask.bpe = 1;
716                 break;
717         case 8:
718                 fmask.bpe = 4;
719                 break;
720         default:
721                 return;
722         }
723
724         device->ws->surface_init(device->ws, &info, &fmask);
725         assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
726
727         out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
728         if (out->slice_tile_max)
729                 out->slice_tile_max -= 1;
730
731         out->tile_mode_index = fmask.u.legacy.tiling_index[0];
732         out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
733         out->bank_height = fmask.u.legacy.bankh;
734         out->tile_swizzle = fmask.tile_swizzle;
735         out->alignment = MAX2(256, fmask.surf_alignment);
736         out->size = fmask.surf_size;
737
738         assert(!out->tile_swizzle || !image->shareable);
739 }
740
741 static void
742 radv_image_alloc_fmask(struct radv_device *device,
743                        struct radv_image *image)
744 {
745         radv_image_get_fmask_info(device, image, image->info.samples, &image->fmask);
746
747         image->fmask.offset = align64(image->size, image->fmask.alignment);
748         image->size = image->fmask.offset + image->fmask.size;
749         image->alignment = MAX2(image->alignment, image->fmask.alignment);
750 }
751
752 static void
753 radv_image_get_cmask_info(struct radv_device *device,
754                           struct radv_image *image,
755                           struct radv_cmask_info *out)
756 {
757         unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;
758         unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
759         unsigned cl_width, cl_height;
760
761         if (device->physical_device->rad_info.chip_class >= GFX9) {
762                 out->alignment = image->surface.u.gfx9.cmask_alignment;
763                 out->size = image->surface.u.gfx9.cmask_size;
764                 return;
765         }
766
767         switch (num_pipes) {
768         case 2:
769                 cl_width = 32;
770                 cl_height = 16;
771                 break;
772         case 4:
773                 cl_width = 32;
774                 cl_height = 32;
775                 break;
776         case 8:
777                 cl_width = 64;
778                 cl_height = 32;
779                 break;
780         case 16: /* Hawaii */
781                 cl_width = 64;
782                 cl_height = 64;
783                 break;
784         default:
785                 assert(0);
786                 return;
787         }
788
789         unsigned base_align = num_pipes * pipe_interleave_bytes;
790
791         unsigned width = align(image->info.width, cl_width*8);
792         unsigned height = align(image->info.height, cl_height*8);
793         unsigned slice_elements = (width * height) / (8*8);
794
795         /* Each element of CMASK is a nibble. */
796         unsigned slice_bytes = slice_elements / 2;
797
798         out->slice_tile_max = (width * height) / (128*128);
799         if (out->slice_tile_max)
800                 out->slice_tile_max -= 1;
801
802         out->alignment = MAX2(256, base_align);
803         out->size = (image->type == VK_IMAGE_TYPE_3D ? image->info.depth : image->info.array_size) *
804                     align(slice_bytes, base_align);
805 }
806
807 static void
808 radv_image_alloc_cmask(struct radv_device *device,
809                        struct radv_image *image)
810 {
811         uint32_t clear_value_size = 0;
812         radv_image_get_cmask_info(device, image, &image->cmask);
813
814         image->cmask.offset = align64(image->size, image->cmask.alignment);
815         /* + 8 for storing the clear values */
816         if (!image->clear_value_offset) {
817                 image->clear_value_offset = image->cmask.offset + image->cmask.size;
818                 clear_value_size = 8;
819         }
820         image->size = image->cmask.offset + image->cmask.size + clear_value_size;
821         image->alignment = MAX2(image->alignment, image->cmask.alignment);
822 }
823
824 static void
825 radv_image_alloc_dcc(struct radv_image *image)
826 {
827         image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
828         /* + 16 for storing the clear values + dcc pred */
829         image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
830         image->dcc_pred_offset = image->clear_value_offset + 8;
831         image->size = image->dcc_offset + image->surface.dcc_size + 16;
832         image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
833 }
834
835 static void
836 radv_image_alloc_htile(struct radv_image *image)
837 {
838         image->htile_offset = align64(image->size, image->surface.htile_alignment);
839
840         /* + 8 for storing the clear values */
841         image->clear_value_offset = image->htile_offset + image->surface.htile_size;
842         image->size = image->clear_value_offset + 8;
843         image->alignment = align64(image->alignment, image->surface.htile_alignment);
844 }
845
846 static inline bool
847 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
848 {
849         if (image->info.samples <= 1 &&
850             image->info.width * image->info.height <= 512 * 512) {
851                 /* Do not enable CMASK or DCC for small surfaces where the cost
852                  * of the eliminate pass can be higher than the benefit of fast
853                  * clear. RadeonSI does this, but the image threshold is
854                  * different.
855                  */
856                 return false;
857         }
858
859         return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
860                (image->exclusive || image->queue_family_mask == 1);
861 }
862
863 static inline bool
864 radv_image_can_enable_dcc(struct radv_image *image)
865 {
866         return radv_image_can_enable_dcc_or_cmask(image) &&
867                radv_image_has_dcc(image);
868 }
869
870 static inline bool
871 radv_image_can_enable_cmask(struct radv_image *image)
872 {
873         if (image->surface.bpe > 8 && image->info.samples == 1) {
874                 /* Do not enable CMASK for non-MSAA images (fast color clear)
875                  * because 128 bit formats are not supported, but FMASK might
876                  * still be used.
877                  */
878                 return false;
879         }
880
881         return radv_image_can_enable_dcc_or_cmask(image) &&
882                image->info.levels == 1 &&
883                image->info.depth == 1 &&
884                !image->surface.is_linear;
885 }
886
887 static inline bool
888 radv_image_can_enable_fmask(struct radv_image *image)
889 {
890         return image->info.samples > 1 && vk_format_is_color(image->vk_format);
891 }
892
893 static inline bool
894 radv_image_can_enable_htile(struct radv_image *image)
895 {
896         return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
897 }
898
899 VkResult
900 radv_image_create(VkDevice _device,
901                   const struct radv_image_create_info *create_info,
902                   const VkAllocationCallbacks* alloc,
903                   VkImage *pImage)
904 {
905         RADV_FROM_HANDLE(radv_device, device, _device);
906         const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
907         struct radv_image *image = NULL;
908         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
909
910         radv_assert(pCreateInfo->mipLevels > 0);
911         radv_assert(pCreateInfo->arrayLayers > 0);
912         radv_assert(pCreateInfo->samples > 0);
913         radv_assert(pCreateInfo->extent.width > 0);
914         radv_assert(pCreateInfo->extent.height > 0);
915         radv_assert(pCreateInfo->extent.depth > 0);
916
917         image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8,
918                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
919         if (!image)
920                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
921
922         image->type = pCreateInfo->imageType;
923         image->info.width = pCreateInfo->extent.width;
924         image->info.height = pCreateInfo->extent.height;
925         image->info.depth = pCreateInfo->extent.depth;
926         image->info.samples = pCreateInfo->samples;
927         image->info.array_size = pCreateInfo->arrayLayers;
928         image->info.levels = pCreateInfo->mipLevels;
929
930         image->vk_format = pCreateInfo->format;
931         image->tiling = pCreateInfo->tiling;
932         image->usage = pCreateInfo->usage;
933         image->flags = pCreateInfo->flags;
934
935         image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
936         if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
937                 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
938                         if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHR)
939                                 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
940                         else
941                                 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
942         }
943
944         image->shareable = vk_find_struct_const(pCreateInfo->pNext,
945                                                 EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
946         if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
947                 image->info.surf_index = &device->image_mrt_offset_counter;
948         }
949
950         radv_init_surface(device, &image->surface, create_info);
951
952         device->ws->surface_init(device->ws, &image->info, &image->surface);
953
954         image->size = image->surface.surf_size;
955         image->alignment = image->surface.surf_alignment;
956
957         if (!create_info->no_metadata_planes) {
958                 /* Try to enable DCC first. */
959                 if (radv_image_can_enable_dcc(image)) {
960                         radv_image_alloc_dcc(image);
961                 } else {
962                         /* When DCC cannot be enabled, try CMASK. */
963                         image->surface.dcc_size = 0;
964                         if (radv_image_can_enable_cmask(image)) {
965                                 radv_image_alloc_cmask(device, image);
966                         }
967                 }
968
969                 /* Try to enable FMASK for multisampled images. */
970                 if (radv_image_can_enable_fmask(image)) {
971                         radv_image_alloc_fmask(device, image);
972                 } else {
973                         /* Otherwise, try to enable HTILE for depth surfaces. */
974                         if (radv_image_can_enable_htile(image) &&
975                             !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
976                                 radv_image_alloc_htile(image);
977                                 image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
978                         } else {
979                                 image->surface.htile_size = 0;
980                         }
981                 }
982         } else {
983                 image->surface.dcc_size = 0;
984                 image->surface.htile_size = 0;
985         }
986
987         if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
988                 image->alignment = MAX2(image->alignment, 4096);
989                 image->size = align64(image->size, image->alignment);
990                 image->offset = 0;
991
992                 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
993                                                       0, RADEON_FLAG_VIRTUAL);
994                 if (!image->bo) {
995                         vk_free2(&device->alloc, alloc, image);
996                         return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
997                 }
998         }
999
1000         *pImage = radv_image_to_handle(image);
1001
1002         return VK_SUCCESS;
1003 }
1004
1005 static void
1006 radv_image_view_make_descriptor(struct radv_image_view *iview,
1007                                 struct radv_device *device,
1008                                 const VkComponentMapping *components,
1009                                 bool is_storage_image)
1010 {
1011         struct radv_image *image = iview->image;
1012         bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1013         uint32_t blk_w;
1014         uint32_t *descriptor;
1015         uint32_t hw_level = 0;
1016
1017         if (is_storage_image) {
1018                 descriptor = iview->storage_descriptor;
1019         } else {
1020                 descriptor = iview->descriptor;
1021         }
1022
1023         assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
1024         blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
1025
1026         if (device->physical_device->rad_info.chip_class >= GFX9)
1027                 hw_level = iview->base_mip;
1028         si_make_texture_descriptor(device, image, is_storage_image,
1029                                    iview->type,
1030                                    iview->vk_format,
1031                                    components,
1032                                    hw_level, hw_level + iview->level_count - 1,
1033                                    iview->base_layer,
1034                                    iview->base_layer + iview->layer_count - 1,
1035                                    iview->extent.width,
1036                                    iview->extent.height,
1037                                    iview->extent.depth,
1038                                    descriptor,
1039                                    descriptor + 8);
1040
1041         const struct legacy_surf_level *base_level_info = NULL;
1042         if (device->physical_device->rad_info.chip_class <= GFX9) {
1043                 if (is_stencil)
1044                         base_level_info = &image->surface.u.legacy.stencil_level[iview->base_mip];
1045                 else
1046                         base_level_info = &image->surface.u.legacy.level[iview->base_mip];
1047         }
1048         si_set_mutable_tex_desc_fields(device, image,
1049                                        base_level_info,
1050                                        iview->base_mip,
1051                                        iview->base_mip,
1052                                        blk_w, is_stencil, is_storage_image, descriptor);
1053 }
1054
1055 void
1056 radv_image_view_init(struct radv_image_view *iview,
1057                      struct radv_device *device,
1058                      const VkImageViewCreateInfo* pCreateInfo)
1059 {
1060         RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1061         const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1062
1063         switch (image->type) {
1064         case VK_IMAGE_TYPE_1D:
1065         case VK_IMAGE_TYPE_2D:
1066                 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1067                 break;
1068         case VK_IMAGE_TYPE_3D:
1069                 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1070                        <= radv_minify(image->info.depth, range->baseMipLevel));
1071                 break;
1072         default:
1073                 unreachable("bad VkImageType");
1074         }
1075         iview->image = image;
1076         iview->bo = image->bo;
1077         iview->type = pCreateInfo->viewType;
1078         iview->vk_format = pCreateInfo->format;
1079         iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1080
1081         if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1082                 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1083         } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1084                 iview->vk_format = vk_format_depth_only(iview->vk_format);
1085         }
1086
1087         if (device->physical_device->rad_info.chip_class >= GFX9) {
1088                 iview->extent = (VkExtent3D) {
1089                         .width = image->info.width,
1090                         .height = image->info.height,
1091                         .depth = image->info.depth,
1092                 };
1093         } else {
1094                 iview->extent = (VkExtent3D) {
1095                         .width  = radv_minify(image->info.width , range->baseMipLevel),
1096                         .height = radv_minify(image->info.height, range->baseMipLevel),
1097                         .depth  = radv_minify(image->info.depth , range->baseMipLevel),
1098                 };
1099         }
1100
1101         if (iview->vk_format != image->vk_format) {
1102                 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1103                 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1104                 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1105                 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1106
1107                 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1108                 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1109
1110                 /* Comment ported from amdvlk -
1111                  * If we have the following image:
1112                  *              Uncompressed pixels   Compressed block sizes (4x4)
1113                  *      mip0:       22 x 22                   6 x 6
1114                  *      mip1:       11 x 11                   3 x 3
1115                  *      mip2:        5 x  5                   2 x 2
1116                  *      mip3:        2 x  2                   1 x 1
1117                  *      mip4:        1 x  1                   1 x 1
1118                  *
1119                  * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1120                  * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1121                  * divide-by-two integer math):
1122                  *      mip0:  6x6
1123                  *      mip1:  3x3
1124                  *      mip2:  1x1
1125                  *      mip3:  1x1
1126                  *
1127                  * This means that mip2 will be missing texels.
1128                  *
1129                  * Fix this by calculating the base mip's width and height, then convert that, and round it
1130                  * back up to get the level 0 size.
1131                  * Clamp the converted size between the original values, and next power of two, which
1132                  * means we don't oversize the image.
1133                  */
1134                  if (device->physical_device->rad_info.chip_class >= GFX9 &&
1135                      vk_format_is_compressed(image->vk_format) &&
1136                      !vk_format_is_compressed(iview->vk_format)) {
1137                          unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
1138                          unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
1139                          unsigned lvl_width  = radv_minify(image->info.width , range->baseMipLevel);
1140                          unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1141
1142                          lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1143                          lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1144
1145                          lvl_width <<= range->baseMipLevel;
1146                          lvl_height <<= range->baseMipLevel;
1147
1148                          iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
1149                          iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
1150                  }
1151         }
1152
1153         iview->base_layer = range->baseArrayLayer;
1154         iview->layer_count = radv_get_layerCount(image, range);
1155         iview->base_mip = range->baseMipLevel;
1156         iview->level_count = radv_get_levelCount(image, range);
1157
1158         radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, false);
1159         radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, true);
1160 }
1161
1162 bool radv_layout_has_htile(const struct radv_image *image,
1163                            VkImageLayout layout,
1164                            unsigned queue_mask)
1165 {
1166         if (radv_image_has_htile(image) && image->tc_compatible_htile)
1167                 return layout != VK_IMAGE_LAYOUT_GENERAL;
1168
1169         return radv_image_has_htile(image) &&
1170                (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1171                 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
1172                queue_mask == (1u << RADV_QUEUE_GENERAL);
1173 }
1174
1175 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1176                                      VkImageLayout layout,
1177                                      unsigned queue_mask)
1178 {
1179         if (radv_image_has_htile(image) && image->tc_compatible_htile)
1180                 return layout != VK_IMAGE_LAYOUT_GENERAL;
1181
1182         return radv_image_has_htile(image) &&
1183                (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1184                 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
1185                queue_mask == (1u << RADV_QUEUE_GENERAL);
1186 }
1187
1188 bool radv_layout_can_fast_clear(const struct radv_image *image,
1189                                 VkImageLayout layout,
1190                                 unsigned queue_mask)
1191 {
1192         return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
1193                 queue_mask == (1u << RADV_QUEUE_GENERAL);
1194 }
1195
1196 bool radv_layout_dcc_compressed(const struct radv_image *image,
1197                                 VkImageLayout layout,
1198                                 unsigned queue_mask)
1199 {
1200         /* Don't compress compute transfer dst, as image stores are not supported. */
1201         if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1202             (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1203                 return false;
1204
1205         return image->surface.num_dcc_levels > 0 && layout != VK_IMAGE_LAYOUT_GENERAL;
1206 }
1207
1208
1209 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1210 {
1211         if (!image->exclusive)
1212                 return image->queue_family_mask;
1213         if (family == VK_QUEUE_FAMILY_EXTERNAL_KHR)
1214                 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1215         if (family == VK_QUEUE_FAMILY_IGNORED)
1216                 return 1u << queue_family;
1217         return 1u << family;
1218 }
1219
1220 VkResult
1221 radv_CreateImage(VkDevice device,
1222                  const VkImageCreateInfo *pCreateInfo,
1223                  const VkAllocationCallbacks *pAllocator,
1224                  VkImage *pImage)
1225 {
1226 #ifdef ANDROID
1227         const VkNativeBufferANDROID *gralloc_info =
1228                 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1229
1230         if (gralloc_info)
1231                 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1232                                               pAllocator, pImage);
1233 #endif
1234
1235         const struct wsi_image_create_info *wsi_info =
1236                 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1237         bool scanout = wsi_info && wsi_info->scanout;
1238
1239         return radv_image_create(device,
1240                                  &(struct radv_image_create_info) {
1241                                          .vk_info = pCreateInfo,
1242                                          .scanout = scanout,
1243                                  },
1244                                  pAllocator,
1245                                  pImage);
1246 }
1247
1248 void
1249 radv_DestroyImage(VkDevice _device, VkImage _image,
1250                   const VkAllocationCallbacks *pAllocator)
1251 {
1252         RADV_FROM_HANDLE(radv_device, device, _device);
1253         RADV_FROM_HANDLE(radv_image, image, _image);
1254
1255         if (!image)
1256                 return;
1257
1258         if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1259                 device->ws->buffer_destroy(image->bo);
1260
1261         if (image->owned_memory != VK_NULL_HANDLE)
1262                 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1263
1264         vk_free2(&device->alloc, pAllocator, image);
1265 }
1266
1267 void radv_GetImageSubresourceLayout(
1268         VkDevice                                    _device,
1269         VkImage                                     _image,
1270         const VkImageSubresource*                   pSubresource,
1271         VkSubresourceLayout*                        pLayout)
1272 {
1273         RADV_FROM_HANDLE(radv_image, image, _image);
1274         RADV_FROM_HANDLE(radv_device, device, _device);
1275         int level = pSubresource->mipLevel;
1276         int layer = pSubresource->arrayLayer;
1277         struct radeon_surf *surface = &image->surface;
1278
1279         if (device->physical_device->rad_info.chip_class >= GFX9) {
1280                 pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
1281                 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
1282                 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1283                 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1284                 pLayout->size = surface->u.gfx9.surf_slice_size;
1285                 if (image->type == VK_IMAGE_TYPE_3D)
1286                         pLayout->size *= u_minify(image->info.depth, level);
1287         } else {
1288                 pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1289                 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1290                 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1291                 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1292                 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1293                 if (image->type == VK_IMAGE_TYPE_3D)
1294                         pLayout->size *= u_minify(image->info.depth, level);
1295         }
1296 }
1297
1298
1299 VkResult
1300 radv_CreateImageView(VkDevice _device,
1301                      const VkImageViewCreateInfo *pCreateInfo,
1302                      const VkAllocationCallbacks *pAllocator,
1303                      VkImageView *pView)
1304 {
1305         RADV_FROM_HANDLE(radv_device, device, _device);
1306         struct radv_image_view *view;
1307
1308         view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1309                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1310         if (view == NULL)
1311                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1312
1313         radv_image_view_init(view, device, pCreateInfo);
1314
1315         *pView = radv_image_view_to_handle(view);
1316
1317         return VK_SUCCESS;
1318 }
1319
1320 void
1321 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1322                       const VkAllocationCallbacks *pAllocator)
1323 {
1324         RADV_FROM_HANDLE(radv_device, device, _device);
1325         RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1326
1327         if (!iview)
1328                 return;
1329         vk_free2(&device->alloc, pAllocator, iview);
1330 }
1331
1332 void radv_buffer_view_init(struct radv_buffer_view *view,
1333                            struct radv_device *device,
1334                            const VkBufferViewCreateInfo* pCreateInfo)
1335 {
1336         RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1337
1338         view->bo = buffer->bo;
1339         view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1340                 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1341         view->vk_format = pCreateInfo->format;
1342
1343         radv_make_buffer_descriptor(device, buffer, view->vk_format,
1344                                     pCreateInfo->offset, view->range, view->state);
1345 }
1346
1347 VkResult
1348 radv_CreateBufferView(VkDevice _device,
1349                       const VkBufferViewCreateInfo *pCreateInfo,
1350                       const VkAllocationCallbacks *pAllocator,
1351                       VkBufferView *pView)
1352 {
1353         RADV_FROM_HANDLE(radv_device, device, _device);
1354         struct radv_buffer_view *view;
1355
1356         view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1357                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1358         if (!view)
1359                 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1360
1361         radv_buffer_view_init(view, device, pCreateInfo);
1362
1363         *pView = radv_buffer_view_to_handle(view);
1364
1365         return VK_SUCCESS;
1366 }
1367
1368 void
1369 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1370                        const VkAllocationCallbacks *pAllocator)
1371 {
1372         RADV_FROM_HANDLE(radv_device, device, _device);
1373         RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1374
1375         if (!view)
1376                 return;
1377
1378         vk_free2(&device->alloc, pAllocator, view);
1379 }