OSDN Git Service

Revert "Revert "amdgpu: Set USWC for SCANOUT images.""
[android-x86/external-minigbm.git] / amdgpu.c
1 /*
2  * Copyright 2016 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 #ifdef DRV_AMDGPU
7 #include <amdgpu.h>
8 #include <amdgpu_drm.h>
9 #include <errno.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <xf86drm.h>
15
16 #include "dri.h"
17 #include "drv_priv.h"
18 #include "helpers.h"
19 #include "util.h"
20
21 // clang-format off
22 #define DRI_PATH STRINGIZE(DRI_DRIVER_DIR/radeonsi_dri.so)
23 // clang-format on
24
25 #define TILE_TYPE_LINEAR 0
26 /* DRI backend decides tiling in this case. */
27 #define TILE_TYPE_DRI 1
28
29 struct amdgpu_priv {
30         struct dri_driver dri;
31         int drm_version;
32
33         /* sdma */
34         struct drm_amdgpu_info_device dev_info;
35         uint32_t sdma_ctx;
36         uint32_t sdma_cmdbuf_bo;
37         uint64_t sdma_cmdbuf_addr;
38         uint64_t sdma_cmdbuf_size;
39         uint32_t *sdma_cmdbuf_map;
40 };
41
42 struct amdgpu_linear_vma_priv {
43         uint32_t handle;
44         uint32_t map_flags;
45 };
46
47 const static uint32_t render_target_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888,
48                                                   DRM_FORMAT_RGB565, DRM_FORMAT_XBGR8888,
49                                                   DRM_FORMAT_XRGB8888 };
50
51 const static uint32_t texture_source_formats[] = { DRM_FORMAT_GR88,           DRM_FORMAT_R8,
52                                                    DRM_FORMAT_NV21,           DRM_FORMAT_NV12,
53                                                    DRM_FORMAT_YVU420_ANDROID, DRM_FORMAT_YVU420 };
54
55 static int query_dev_info(int fd, struct drm_amdgpu_info_device *dev_info)
56 {
57         struct drm_amdgpu_info info_args = { 0 };
58
59         info_args.return_pointer = (uintptr_t)dev_info;
60         info_args.return_size = sizeof(*dev_info);
61         info_args.query = AMDGPU_INFO_DEV_INFO;
62
63         return drmCommandWrite(fd, DRM_AMDGPU_INFO, &info_args, sizeof(info_args));
64 }
65
66 static int sdma_init(struct amdgpu_priv *priv, int fd)
67 {
68         union drm_amdgpu_ctx ctx_args = { { 0 } };
69         union drm_amdgpu_gem_create gem_create = { { 0 } };
70         struct drm_amdgpu_gem_va va_args = { 0 };
71         union drm_amdgpu_gem_mmap gem_map = { { 0 } };
72         struct drm_gem_close gem_close = { 0 };
73         int ret;
74
75         /* Ensure we can make a submission without BO lists. */
76         if (priv->drm_version < 27)
77                 return 0;
78
79         /* Anything outside this range needs adjustments to the SDMA copy commands */
80         if (priv->dev_info.family < AMDGPU_FAMILY_CI || priv->dev_info.family > AMDGPU_FAMILY_NV)
81                 return 0;
82
83         ctx_args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
84
85         ret = drmCommandWriteRead(fd, DRM_AMDGPU_CTX, &ctx_args, sizeof(ctx_args));
86         if (ret < 0)
87                 return ret;
88
89         priv->sdma_ctx = ctx_args.out.alloc.ctx_id;
90
91         priv->sdma_cmdbuf_size = ALIGN(4096, priv->dev_info.virtual_address_alignment);
92         gem_create.in.bo_size = priv->sdma_cmdbuf_size;
93         gem_create.in.alignment = 4096;
94         gem_create.in.domains = AMDGPU_GEM_DOMAIN_GTT;
95
96         ret = drmCommandWriteRead(fd, DRM_AMDGPU_GEM_CREATE, &gem_create, sizeof(gem_create));
97         if (ret < 0)
98                 goto fail_ctx;
99
100         priv->sdma_cmdbuf_bo = gem_create.out.handle;
101
102         priv->sdma_cmdbuf_addr =
103             ALIGN(priv->dev_info.virtual_address_offset, priv->dev_info.virtual_address_alignment);
104
105         /* Map the buffer into the GPU address space so we can use it from the GPU */
106         va_args.handle = priv->sdma_cmdbuf_bo;
107         va_args.operation = AMDGPU_VA_OP_MAP;
108         va_args.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
109         va_args.va_address = priv->sdma_cmdbuf_addr;
110         va_args.offset_in_bo = 0;
111         va_args.map_size = priv->sdma_cmdbuf_size;
112
113         ret = drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
114         if (ret)
115                 goto fail_bo;
116
117         gem_map.in.handle = priv->sdma_cmdbuf_bo;
118         ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &gem_map);
119         if (ret)
120                 goto fail_va;
121
122         priv->sdma_cmdbuf_map = mmap(0, priv->sdma_cmdbuf_size, PROT_READ | PROT_WRITE, MAP_SHARED,
123                                      fd, gem_map.out.addr_ptr);
124         if (priv->sdma_cmdbuf_map == MAP_FAILED) {
125                 priv->sdma_cmdbuf_map = NULL;
126                 ret = -ENOMEM;
127                 goto fail_va;
128         }
129
130         return 0;
131 fail_va:
132         va_args.operation = AMDGPU_VA_OP_UNMAP;
133         va_args.flags = 0;
134         drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
135 fail_bo:
136         gem_close.handle = priv->sdma_cmdbuf_bo;
137         drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
138 fail_ctx:
139         memset(&ctx_args, 0, sizeof(ctx_args));
140         ctx_args.in.op = AMDGPU_CTX_OP_FREE_CTX;
141         ctx_args.in.ctx_id = priv->sdma_ctx;
142         drmCommandWriteRead(fd, DRM_AMDGPU_CTX, &ctx_args, sizeof(ctx_args));
143         return ret;
144 }
145
146 static void sdma_finish(struct amdgpu_priv *priv, int fd)
147 {
148         union drm_amdgpu_ctx ctx_args = { { 0 } };
149         struct drm_amdgpu_gem_va va_args = { 0 };
150         struct drm_gem_close gem_close = { 0 };
151
152         if (!priv->sdma_cmdbuf_map)
153                 return;
154
155         va_args.handle = priv->sdma_cmdbuf_bo;
156         va_args.operation = AMDGPU_VA_OP_UNMAP;
157         va_args.flags = 0;
158         va_args.va_address = priv->sdma_cmdbuf_addr;
159         va_args.offset_in_bo = 0;
160         va_args.map_size = priv->sdma_cmdbuf_size;
161         drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
162
163         gem_close.handle = priv->sdma_cmdbuf_bo;
164         drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
165
166         ctx_args.in.op = AMDGPU_CTX_OP_FREE_CTX;
167         ctx_args.in.ctx_id = priv->sdma_ctx;
168         drmCommandWriteRead(fd, DRM_AMDGPU_CTX, &ctx_args, sizeof(ctx_args));
169 }
170
171 static int sdma_copy(struct amdgpu_priv *priv, int fd, uint32_t src_handle, uint32_t dst_handle,
172                      uint64_t size)
173 {
174         const uint64_t max_size_per_cmd = 0x3fff00;
175         const uint32_t cmd_size = 7 * sizeof(uint32_t); /* 7 dwords, see loop below. */
176         const uint64_t max_commands = priv->sdma_cmdbuf_size / cmd_size;
177         uint64_t src_addr = priv->sdma_cmdbuf_addr + priv->sdma_cmdbuf_size;
178         uint64_t dst_addr = src_addr + size;
179         struct drm_amdgpu_gem_va va_args = { 0 };
180         unsigned cmd = 0;
181         uint64_t remaining_size = size;
182         uint64_t cur_src_addr = src_addr;
183         uint64_t cur_dst_addr = dst_addr;
184         struct drm_amdgpu_cs_chunk_ib ib = { 0 };
185         struct drm_amdgpu_cs_chunk chunks[2] = { { 0 } };
186         uint64_t chunk_ptrs[2];
187         union drm_amdgpu_cs cs = { { 0 } };
188         struct drm_amdgpu_bo_list_in bo_list = { 0 };
189         struct drm_amdgpu_bo_list_entry bo_list_entries[3] = { { 0 } };
190         union drm_amdgpu_wait_cs wait_cs = { { 0 } };
191         int ret = 0;
192
193         if (size > UINT64_MAX - max_size_per_cmd ||
194             DIV_ROUND_UP(size, max_size_per_cmd) > max_commands)
195                 return -ENOMEM;
196
197         /* Map both buffers into the GPU address space so we can access them from the GPU. */
198         va_args.handle = src_handle;
199         va_args.operation = AMDGPU_VA_OP_MAP;
200         va_args.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_DELAY_UPDATE;
201         va_args.va_address = src_addr;
202         va_args.map_size = size;
203
204         ret = drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
205         if (ret)
206                 return ret;
207
208         va_args.handle = dst_handle;
209         va_args.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_DELAY_UPDATE;
210         va_args.va_address = dst_addr;
211
212         ret = drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
213         if (ret)
214                 goto unmap_src;
215
216         while (remaining_size) {
217                 uint64_t cur_size = remaining_size;
218                 if (cur_size > max_size_per_cmd)
219                         cur_size = max_size_per_cmd;
220
221                 priv->sdma_cmdbuf_map[cmd++] = 0x01; /* linear copy */
222                 priv->sdma_cmdbuf_map[cmd++] =
223                     priv->dev_info.family >= AMDGPU_FAMILY_AI ? (cur_size - 1) : cur_size;
224                 priv->sdma_cmdbuf_map[cmd++] = 0;
225                 priv->sdma_cmdbuf_map[cmd++] = cur_src_addr;
226                 priv->sdma_cmdbuf_map[cmd++] = cur_src_addr >> 32;
227                 priv->sdma_cmdbuf_map[cmd++] = cur_dst_addr;
228                 priv->sdma_cmdbuf_map[cmd++] = cur_dst_addr >> 32;
229
230                 remaining_size -= cur_size;
231                 cur_src_addr += cur_size;
232                 cur_dst_addr += cur_size;
233         }
234
235         ib.va_start = priv->sdma_cmdbuf_addr;
236         ib.ib_bytes = cmd * 4;
237         ib.ip_type = AMDGPU_HW_IP_DMA;
238
239         chunks[1].chunk_id = AMDGPU_CHUNK_ID_IB;
240         chunks[1].length_dw = sizeof(ib) / 4;
241         chunks[1].chunk_data = (uintptr_t)&ib;
242
243         bo_list_entries[0].bo_handle = priv->sdma_cmdbuf_bo;
244         bo_list_entries[0].bo_priority = 8; /* Middle of range, like RADV. */
245         bo_list_entries[1].bo_handle = src_handle;
246         bo_list_entries[1].bo_priority = 8;
247         bo_list_entries[2].bo_handle = dst_handle;
248         bo_list_entries[2].bo_priority = 8;
249
250         bo_list.bo_number = 3;
251         bo_list.bo_info_size = sizeof(bo_list_entries[0]);
252         bo_list.bo_info_ptr = (uintptr_t)bo_list_entries;
253
254         chunks[0].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
255         chunks[0].length_dw = sizeof(bo_list) / 4;
256         chunks[0].chunk_data = (uintptr_t)&bo_list;
257
258         chunk_ptrs[0] = (uintptr_t)&chunks[0];
259         chunk_ptrs[1] = (uintptr_t)&chunks[1];
260
261         cs.in.ctx_id = priv->sdma_ctx;
262         cs.in.num_chunks = 2;
263         cs.in.chunks = (uintptr_t)chunk_ptrs;
264
265         ret = drmCommandWriteRead(fd, DRM_AMDGPU_CS, &cs, sizeof(cs));
266         if (ret) {
267                 drv_log("SDMA copy command buffer submission failed %d\n", ret);
268                 goto unmap_dst;
269         }
270
271         wait_cs.in.handle = cs.out.handle;
272         wait_cs.in.ip_type = AMDGPU_HW_IP_DMA;
273         wait_cs.in.ctx_id = priv->sdma_ctx;
274         wait_cs.in.timeout = INT64_MAX;
275
276         ret = drmCommandWriteRead(fd, DRM_AMDGPU_WAIT_CS, &wait_cs, sizeof(wait_cs));
277         if (ret) {
278                 drv_log("Could not wait for CS to finish\n");
279         } else if (wait_cs.out.status) {
280                 drv_log("Infinite wait timed out, likely GPU hang.\n");
281                 ret = -ENODEV;
282         }
283
284 unmap_dst:
285         va_args.handle = dst_handle;
286         va_args.operation = AMDGPU_VA_OP_UNMAP;
287         va_args.flags = AMDGPU_VM_DELAY_UPDATE;
288         va_args.va_address = dst_addr;
289         drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
290
291 unmap_src:
292         va_args.handle = src_handle;
293         va_args.operation = AMDGPU_VA_OP_UNMAP;
294         va_args.flags = AMDGPU_VM_DELAY_UPDATE;
295         va_args.va_address = src_addr;
296         drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
297
298         return ret;
299 }
300
301 static int amdgpu_init(struct driver *drv)
302 {
303         struct amdgpu_priv *priv;
304         drmVersionPtr drm_version;
305         struct format_metadata metadata;
306         uint64_t use_flags = BO_USE_RENDER_MASK;
307
308         priv = calloc(1, sizeof(struct amdgpu_priv));
309         if (!priv)
310                 return -ENOMEM;
311
312         drm_version = drmGetVersion(drv_get_fd(drv));
313         if (!drm_version) {
314                 free(priv);
315                 return -ENODEV;
316         }
317
318         priv->drm_version = drm_version->version_minor;
319         drmFreeVersion(drm_version);
320
321         drv->priv = priv;
322
323         if (query_dev_info(drv_get_fd(drv), &priv->dev_info)) {
324                 free(priv);
325                 drv->priv = NULL;
326                 return -ENODEV;
327         }
328         if (dri_init(drv, DRI_PATH, "radeonsi")) {
329                 free(priv);
330                 drv->priv = NULL;
331                 return -ENODEV;
332         }
333
334         if (sdma_init(priv, drv_get_fd(drv))) {
335                 drv_log("SDMA init failed\n");
336
337                 /* Continue, as we can still succesfully map things without SDMA. */
338         }
339
340         metadata.tiling = TILE_TYPE_LINEAR;
341         metadata.priority = 1;
342         metadata.modifier = DRM_FORMAT_MOD_LINEAR;
343
344         drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
345                              &metadata, use_flags);
346
347         drv_add_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
348                              &metadata, BO_USE_TEXTURE_MASK);
349
350         /* NV12 format for camera, display, decoding and encoding. */
351         drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata,
352                                BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
353                                    BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER);
354
355         /* Android CTS tests require this. */
356         drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata, BO_USE_SW_MASK);
357
358         /* Linear formats supported by display. */
359         drv_modify_combination(drv, DRM_FORMAT_ARGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
360         drv_modify_combination(drv, DRM_FORMAT_XRGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
361         drv_modify_combination(drv, DRM_FORMAT_ABGR8888, &metadata, BO_USE_SCANOUT);
362         drv_modify_combination(drv, DRM_FORMAT_XBGR8888, &metadata, BO_USE_SCANOUT);
363
364         drv_modify_combination(drv, DRM_FORMAT_NV21, &metadata, BO_USE_SCANOUT);
365
366         /*
367          * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
368          * from camera and input/output from hardware decoder/encoder.
369          */
370         drv_modify_combination(drv, DRM_FORMAT_R8, &metadata,
371                                BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
372                                    BO_USE_HW_VIDEO_ENCODER);
373
374         /*
375          * The following formats will be allocated by the DRI backend and may be potentially tiled.
376          * Since format modifier support hasn't been implemented fully yet, it's not
377          * possible to enumerate the different types of buffers (like i915 can).
378          */
379         use_flags &= ~BO_USE_RENDERSCRIPT;
380         use_flags &= ~BO_USE_SW_WRITE_OFTEN;
381         use_flags &= ~BO_USE_SW_READ_OFTEN;
382         use_flags &= ~BO_USE_LINEAR;
383
384         metadata.tiling = TILE_TYPE_DRI;
385         metadata.priority = 2;
386
387         drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
388                              &metadata, use_flags);
389
390         /* Potentially tiled formats supported by display. */
391         drv_modify_combination(drv, DRM_FORMAT_ARGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
392         drv_modify_combination(drv, DRM_FORMAT_XRGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
393         drv_modify_combination(drv, DRM_FORMAT_ABGR8888, &metadata, BO_USE_SCANOUT);
394         drv_modify_combination(drv, DRM_FORMAT_XBGR8888, &metadata, BO_USE_SCANOUT);
395         return 0;
396 }
397
398 static void amdgpu_close(struct driver *drv)
399 {
400         sdma_finish(drv->priv, drv_get_fd(drv));
401         dri_close(drv);
402         free(drv->priv);
403         drv->priv = NULL;
404 }
405
406 static int amdgpu_create_bo_linear(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
407                                    uint64_t use_flags)
408 {
409         int ret;
410         uint32_t plane, stride;
411         union drm_amdgpu_gem_create gem_create;
412         struct amdgpu_priv *priv = bo->drv->priv;
413
414         stride = drv_stride_from_format(format, width, 0);
415         stride = ALIGN(stride, 256);
416
417         drv_bo_from_format(bo, stride, height, format);
418
419         memset(&gem_create, 0, sizeof(gem_create));
420         gem_create.in.bo_size =
421             ALIGN(bo->meta.total_size, priv->dev_info.virtual_address_alignment);
422         gem_create.in.alignment = 256;
423         gem_create.in.domain_flags = 0;
424
425         if (use_flags & (BO_USE_LINEAR | BO_USE_SW_MASK))
426                 gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
427
428         gem_create.in.domains = AMDGPU_GEM_DOMAIN_GTT;
429
430         /* Scanout in GTT requires USWC, otherwise try to use cachable memory
431          * for buffers that are read often, because uncacheable reads can be
432          * very slow. USWC should be faster on the GPU though. */
433         if ((use_flags & BO_USE_SCANOUT) || !(use_flags & BO_USE_SW_READ_OFTEN))
434                 gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
435
436         /* Allocate the buffer with the preferred heap. */
437         ret = drmCommandWriteRead(drv_get_fd(bo->drv), DRM_AMDGPU_GEM_CREATE, &gem_create,
438                                   sizeof(gem_create));
439         if (ret < 0)
440                 return ret;
441
442         for (plane = 0; plane < bo->meta.num_planes; plane++)
443                 bo->handles[plane].u32 = gem_create.out.handle;
444
445         bo->meta.format_modifiers[0] = DRM_FORMAT_MOD_LINEAR;
446
447         return 0;
448 }
449
450 static int amdgpu_create_bo(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
451                             uint64_t use_flags)
452 {
453         struct combination *combo;
454
455         combo = drv_get_combination(bo->drv, format, use_flags);
456         if (!combo)
457                 return -EINVAL;
458
459         if (combo->metadata.tiling == TILE_TYPE_DRI) {
460                 bool needs_alignment = false;
461 #ifdef __ANDROID__
462                 /*
463                  * Currently, the gralloc API doesn't differentiate between allocation time and map
464                  * time strides. A workaround for amdgpu DRI buffers is to always to align to 256 at
465                  * allocation time.
466                  *
467                  * See b/115946221,b/117942643
468                  */
469                 if (use_flags & (BO_USE_SW_MASK))
470                         needs_alignment = true;
471 #endif
472                 // See b/122049612
473                 if (use_flags & (BO_USE_SCANOUT))
474                         needs_alignment = true;
475
476                 if (needs_alignment) {
477                         uint32_t bytes_per_pixel = drv_bytes_per_pixel_from_format(format, 0);
478                         width = ALIGN(width, 256 / bytes_per_pixel);
479                 }
480
481                 return dri_bo_create(bo, width, height, format, use_flags);
482         }
483
484         return amdgpu_create_bo_linear(bo, width, height, format, use_flags);
485 }
486
487 static int amdgpu_create_bo_with_modifiers(struct bo *bo, uint32_t width, uint32_t height,
488                                            uint32_t format, const uint64_t *modifiers,
489                                            uint32_t count)
490 {
491         bool only_use_linear = true;
492
493         for (uint32_t i = 0; i < count; ++i)
494                 if (modifiers[i] != DRM_FORMAT_MOD_LINEAR)
495                         only_use_linear = false;
496
497         if (only_use_linear)
498                 return amdgpu_create_bo_linear(bo, width, height, format, BO_USE_SCANOUT);
499
500         return dri_bo_create_with_modifiers(bo, width, height, format, modifiers, count);
501 }
502
503 static int amdgpu_import_bo(struct bo *bo, struct drv_import_fd_data *data)
504 {
505         bool dri_tiling = data->format_modifiers[0] != DRM_FORMAT_MOD_LINEAR;
506         if (data->format_modifiers[0] == DRM_FORMAT_MOD_INVALID) {
507                 struct combination *combo;
508                 combo = drv_get_combination(bo->drv, data->format, data->use_flags);
509                 if (!combo)
510                         return -EINVAL;
511
512                 dri_tiling = combo->metadata.tiling == TILE_TYPE_DRI;
513         }
514
515         if (dri_tiling)
516                 return dri_bo_import(bo, data);
517         else
518                 return drv_prime_bo_import(bo, data);
519 }
520
521 static int amdgpu_destroy_bo(struct bo *bo)
522 {
523         if (bo->priv)
524                 return dri_bo_destroy(bo);
525         else
526                 return drv_gem_bo_destroy(bo);
527 }
528
529 static void *amdgpu_map_bo(struct bo *bo, struct vma *vma, size_t plane, uint32_t map_flags)
530 {
531         void *addr = MAP_FAILED;
532         int ret;
533         union drm_amdgpu_gem_mmap gem_map;
534         struct drm_amdgpu_gem_create_in bo_info = { 0 };
535         struct drm_amdgpu_gem_op gem_op = { 0 };
536         uint32_t handle = bo->handles[plane].u32;
537         struct amdgpu_linear_vma_priv *priv = NULL;
538         struct amdgpu_priv *drv_priv;
539
540         if (bo->priv)
541                 return dri_bo_map(bo, vma, plane, map_flags);
542
543         drv_priv = bo->drv->priv;
544         gem_op.handle = handle;
545         gem_op.op = AMDGPU_GEM_OP_GET_GEM_CREATE_INFO;
546         gem_op.value = (uintptr_t)&bo_info;
547
548         ret = drmCommandWriteRead(bo->drv->fd, DRM_AMDGPU_GEM_OP, &gem_op, sizeof(gem_op));
549         if (ret)
550                 return MAP_FAILED;
551
552         vma->length = bo_info.bo_size;
553
554         if (((bo_info.domains & AMDGPU_GEM_DOMAIN_VRAM) ||
555              (bo_info.domain_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)) &&
556             drv_priv->sdma_cmdbuf_map) {
557                 union drm_amdgpu_gem_create gem_create = { { 0 } };
558
559                 priv = calloc(1, sizeof(struct amdgpu_linear_vma_priv));
560                 if (!priv)
561                         return MAP_FAILED;
562
563                 gem_create.in.bo_size = bo_info.bo_size;
564                 gem_create.in.alignment = 4096;
565                 gem_create.in.domains = AMDGPU_GEM_DOMAIN_GTT;
566
567                 ret = drmCommandWriteRead(bo->drv->fd, DRM_AMDGPU_GEM_CREATE, &gem_create,
568                                           sizeof(gem_create));
569                 if (ret < 0) {
570                         drv_log("GEM create failed\n");
571                         free(priv);
572                         return MAP_FAILED;
573                 }
574
575                 priv->map_flags = map_flags;
576                 handle = priv->handle = gem_create.out.handle;
577
578                 ret = sdma_copy(bo->drv->priv, bo->drv->fd, bo->handles[0].u32, priv->handle,
579                                 bo_info.bo_size);
580                 if (ret) {
581                         drv_log("SDMA copy for read failed\n");
582                         goto fail;
583                 }
584         }
585
586         memset(&gem_map, 0, sizeof(gem_map));
587         gem_map.in.handle = handle;
588
589         ret = drmIoctl(bo->drv->fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &gem_map);
590         if (ret) {
591                 drv_log("DRM_IOCTL_AMDGPU_GEM_MMAP failed\n");
592                 goto fail;
593         }
594
595         addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED, bo->drv->fd,
596                     gem_map.out.addr_ptr);
597         if (addr == MAP_FAILED)
598                 goto fail;
599
600         vma->priv = priv;
601         return addr;
602
603 fail:
604         if (priv) {
605                 struct drm_gem_close gem_close = { 0 };
606                 gem_close.handle = priv->handle;
607                 drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
608                 free(priv);
609         }
610         return MAP_FAILED;
611 }
612
613 static int amdgpu_unmap_bo(struct bo *bo, struct vma *vma)
614 {
615         if (bo->priv)
616                 return dri_bo_unmap(bo, vma);
617         else {
618                 int r = munmap(vma->addr, vma->length);
619                 if (r)
620                         return r;
621
622                 if (vma->priv) {
623                         struct amdgpu_linear_vma_priv *priv = vma->priv;
624                         struct drm_gem_close gem_close = { 0 };
625
626                         if (BO_MAP_WRITE & priv->map_flags) {
627                                 r = sdma_copy(bo->drv->priv, bo->drv->fd, priv->handle,
628                                               bo->handles[0].u32, vma->length);
629                                 if (r)
630                                         return r;
631                         }
632
633                         gem_close.handle = priv->handle;
634                         r = drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
635                 }
636
637                 return 0;
638         }
639 }
640
641 static int amdgpu_bo_invalidate(struct bo *bo, struct mapping *mapping)
642 {
643         int ret;
644         union drm_amdgpu_gem_wait_idle wait_idle;
645
646         if (bo->priv)
647                 return 0;
648
649         memset(&wait_idle, 0, sizeof(wait_idle));
650         wait_idle.in.handle = bo->handles[0].u32;
651         wait_idle.in.timeout = AMDGPU_TIMEOUT_INFINITE;
652
653         ret = drmCommandWriteRead(bo->drv->fd, DRM_AMDGPU_GEM_WAIT_IDLE, &wait_idle,
654                                   sizeof(wait_idle));
655
656         if (ret < 0) {
657                 drv_log("DRM_AMDGPU_GEM_WAIT_IDLE failed with %d\n", ret);
658                 return ret;
659         }
660
661         if (ret == 0 && wait_idle.out.status)
662                 drv_log("DRM_AMDGPU_GEM_WAIT_IDLE BO is busy\n");
663
664         return 0;
665 }
666
667 static uint32_t amdgpu_resolve_format(struct driver *drv, uint32_t format, uint64_t use_flags)
668 {
669         switch (format) {
670         case DRM_FORMAT_FLEX_IMPLEMENTATION_DEFINED:
671                 /* Camera subsystem requires NV12. */
672                 if (use_flags & (BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE))
673                         return DRM_FORMAT_NV12;
674                 /*HACK: See b/28671744 */
675                 return DRM_FORMAT_XBGR8888;
676         case DRM_FORMAT_FLEX_YCbCr_420_888:
677                 return DRM_FORMAT_NV12;
678         default:
679                 return format;
680         }
681 }
682
683 const struct backend backend_amdgpu = {
684         .name = "amdgpu",
685         .init = amdgpu_init,
686         .close = amdgpu_close,
687         .bo_create = amdgpu_create_bo,
688         .bo_create_with_modifiers = amdgpu_create_bo_with_modifiers,
689         .bo_destroy = amdgpu_destroy_bo,
690         .bo_import = amdgpu_import_bo,
691         .bo_map = amdgpu_map_bo,
692         .bo_unmap = amdgpu_unmap_bo,
693         .bo_invalidate = amdgpu_bo_invalidate,
694         .resolve_format = amdgpu_resolve_format,
695         .num_planes_from_modifier = dri_num_planes_from_modifier,
696 };
697
698 #endif