OSDN Git Service

cros_gralloc: Add perform action for getting the modifier
[android-x86/external-minigbm.git] / i915.c
1 /*
2  * Copyright 2014 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6
7 #ifdef DRV_I915
8
9 #include <assert.h>
10 #include <errno.h>
11 #include <i915_drm.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <sys/mman.h>
16 #include <unistd.h>
17 #include <xf86drm.h>
18
19 #include "drv_priv.h"
20 #include "helpers.h"
21 #include "util.h"
22
23 #define I915_CACHELINE_SIZE 64
24 #define I915_CACHELINE_MASK (I915_CACHELINE_SIZE - 1)
25
26 static const uint32_t scanout_render_formats[] = { DRM_FORMAT_ABGR2101010, DRM_FORMAT_ABGR8888,
27                                                    DRM_FORMAT_ARGB2101010, DRM_FORMAT_ARGB8888,
28                                                    DRM_FORMAT_RGB565,      DRM_FORMAT_XBGR2101010,
29                                                    DRM_FORMAT_XBGR8888,    DRM_FORMAT_XRGB2101010,
30                                                    DRM_FORMAT_XRGB8888 };
31
32 static const uint32_t render_formats[] = { DRM_FORMAT_ABGR16161616F };
33
34 static const uint32_t texture_only_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_NV12, DRM_FORMAT_P010,
35                                                  DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID };
36
37 struct i915_device {
38         uint32_t gen;
39         int32_t has_llc;
40 };
41
42 static uint32_t i915_get_gen(int device_id)
43 {
44         const uint16_t gen3_ids[] = { 0x2582, 0x2592, 0x2772, 0x27A2, 0x27AE,
45                                       0x29C2, 0x29B2, 0x29D2, 0xA001, 0xA011 };
46         unsigned i;
47         for (i = 0; i < ARRAY_SIZE(gen3_ids); i++)
48                 if (gen3_ids[i] == device_id)
49                         return 3;
50
51         return 4;
52 }
53
54 static uint64_t unset_flags(uint64_t current_flags, uint64_t mask)
55 {
56         uint64_t value = current_flags & ~mask;
57         return value;
58 }
59
60 static int i915_add_combinations(struct driver *drv)
61 {
62         struct format_metadata metadata;
63         uint64_t render, scanout_and_render, texture_only;
64
65         scanout_and_render = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
66         render = BO_USE_RENDER_MASK;
67         texture_only = BO_USE_TEXTURE_MASK;
68         uint64_t linear_mask = BO_USE_RENDERSCRIPT | BO_USE_LINEAR | BO_USE_PROTECTED |
69                                BO_USE_SW_READ_OFTEN | BO_USE_SW_WRITE_OFTEN;
70
71         metadata.tiling = I915_TILING_NONE;
72         metadata.priority = 1;
73         metadata.modifier = DRM_FORMAT_MOD_LINEAR;
74
75         drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
76                              &metadata, scanout_and_render);
77
78         drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata, render);
79
80         drv_add_combinations(drv, texture_only_formats, ARRAY_SIZE(texture_only_formats), &metadata,
81                              texture_only);
82
83         drv_modify_linear_combinations(drv);
84
85         /* NV12 format for camera, display, decoding and encoding. */
86         /* IPU3 camera ISP supports only NV12 output. */
87         drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata,
88                                BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
89                                    BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER);
90
91         /* Android CTS tests require this. */
92         drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata, BO_USE_SW_MASK);
93
94         /*
95          * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
96          * from camera and input/output from hardware decoder/encoder.
97          */
98         drv_modify_combination(drv, DRM_FORMAT_R8, &metadata,
99                                BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
100                                    BO_USE_HW_VIDEO_ENCODER);
101
102         render = unset_flags(render, linear_mask);
103         scanout_and_render = unset_flags(scanout_and_render, linear_mask);
104
105         metadata.tiling = I915_TILING_X;
106         metadata.priority = 2;
107         metadata.modifier = I915_FORMAT_MOD_X_TILED;
108
109         drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata, render);
110         drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
111                              &metadata, scanout_and_render);
112
113         metadata.tiling = I915_TILING_Y;
114         metadata.priority = 3;
115         metadata.modifier = I915_FORMAT_MOD_Y_TILED;
116
117         scanout_and_render =
118             unset_flags(scanout_and_render, BO_USE_SW_READ_RARELY | BO_USE_SW_WRITE_RARELY);
119 /* Support y-tiled NV12 and P010 for libva */
120 #ifdef I915_SCANOUT_Y_TILED
121         drv_add_combination(drv, DRM_FORMAT_NV12, &metadata,
122                             BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT);
123 #else
124         drv_add_combination(drv, DRM_FORMAT_NV12, &metadata,
125                             BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER);
126 #endif
127         scanout_and_render = unset_flags(scanout_and_render, BO_USE_SCANOUT);
128         drv_add_combination(drv, DRM_FORMAT_P010, &metadata,
129                             BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER);
130
131         drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata, render);
132         drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
133                              &metadata, scanout_and_render);
134         return 0;
135 }
136
137 static int i915_align_dimensions(struct bo *bo, uint32_t tiling, uint32_t *stride,
138                                  uint32_t *aligned_height)
139 {
140         struct i915_device *i915 = bo->drv->priv;
141         uint32_t horizontal_alignment;
142         uint32_t vertical_alignment;
143
144         switch (tiling) {
145         default:
146         case I915_TILING_NONE:
147                 /*
148                  * The Intel GPU doesn't need any alignment in linear mode,
149                  * but libva requires the allocation stride to be aligned to
150                  * 16 bytes and height to 4 rows. Further, we round up the
151                  * horizontal alignment so that row start on a cache line (64
152                  * bytes).
153                  */
154                 horizontal_alignment = 64;
155                 vertical_alignment = 4;
156                 break;
157
158         case I915_TILING_X:
159                 horizontal_alignment = 512;
160                 vertical_alignment = 8;
161                 break;
162
163         case I915_TILING_Y:
164                 if (i915->gen == 3) {
165                         horizontal_alignment = 512;
166                         vertical_alignment = 8;
167                 } else {
168                         horizontal_alignment = 128;
169                         vertical_alignment = 32;
170                 }
171                 break;
172         }
173
174         *aligned_height = ALIGN(*aligned_height, vertical_alignment);
175         if (i915->gen > 3) {
176                 *stride = ALIGN(*stride, horizontal_alignment);
177         } else {
178                 while (*stride > horizontal_alignment)
179                         horizontal_alignment <<= 1;
180
181                 *stride = horizontal_alignment;
182         }
183
184         if (i915->gen <= 3 && *stride > 8192)
185                 return -EINVAL;
186
187         return 0;
188 }
189
190 static void i915_clflush(void *start, size_t size)
191 {
192         void *p = (void *)(((uintptr_t)start) & ~I915_CACHELINE_MASK);
193         void *end = (void *)((uintptr_t)start + size);
194
195         __builtin_ia32_mfence();
196         while (p < end) {
197                 __builtin_ia32_clflush(p);
198                 p = (void *)((uintptr_t)p + I915_CACHELINE_SIZE);
199         }
200 }
201
202 static int i915_init(struct driver *drv)
203 {
204         int ret;
205         int device_id;
206         struct i915_device *i915;
207         drm_i915_getparam_t get_param;
208
209         i915 = calloc(1, sizeof(*i915));
210         if (!i915)
211                 return -ENOMEM;
212
213         memset(&get_param, 0, sizeof(get_param));
214         get_param.param = I915_PARAM_CHIPSET_ID;
215         get_param.value = &device_id;
216         ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
217         if (ret) {
218                 drv_log("Failed to get I915_PARAM_CHIPSET_ID\n");
219                 free(i915);
220                 return -EINVAL;
221         }
222
223         i915->gen = i915_get_gen(device_id);
224
225         memset(&get_param, 0, sizeof(get_param));
226         get_param.param = I915_PARAM_HAS_LLC;
227         get_param.value = &i915->has_llc;
228         ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
229         if (ret) {
230                 drv_log("Failed to get I915_PARAM_HAS_LLC\n");
231                 free(i915);
232                 return -EINVAL;
233         }
234
235         drv->priv = i915;
236
237         return i915_add_combinations(drv);
238 }
239
240 static int i915_bo_from_format(struct bo *bo, uint32_t width, uint32_t height, uint32_t format)
241 {
242         uint32_t offset;
243         size_t plane;
244         int ret, pagesize;
245
246         offset = 0;
247         pagesize = getpagesize();
248         for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
249                 uint32_t stride = drv_stride_from_format(format, width, plane);
250                 uint32_t plane_height = drv_height_from_format(format, height, plane);
251
252                 if (bo->meta.tiling != I915_TILING_NONE)
253                         assert(IS_ALIGNED(offset, pagesize));
254
255                 ret = i915_align_dimensions(bo, bo->meta.tiling, &stride, &plane_height);
256                 if (ret)
257                         return ret;
258
259                 bo->meta.strides[plane] = stride;
260                 bo->meta.sizes[plane] = stride * plane_height;
261                 bo->meta.offsets[plane] = offset;
262                 offset += bo->meta.sizes[plane];
263         }
264
265         bo->meta.total_size = ALIGN(offset, pagesize);
266
267         return 0;
268 }
269
270 static int i915_bo_compute_metadata(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
271                                     uint64_t use_flags, const uint64_t *modifiers, uint32_t count)
272 {
273         static const uint64_t modifier_order[] = {
274                 I915_FORMAT_MOD_Y_TILED,
275                 I915_FORMAT_MOD_X_TILED,
276                 DRM_FORMAT_MOD_LINEAR,
277         };
278         uint64_t modifier;
279         struct i915_device *i915 = bo->drv->priv;
280         bool huge_bo = (i915->gen <= 11) && (width > 4096);
281
282         if (modifiers) {
283                 modifier =
284                     drv_pick_modifier(modifiers, count, modifier_order, ARRAY_SIZE(modifier_order));
285         } else {
286                 struct combination *combo = drv_get_combination(bo->drv, format, use_flags);
287                 if (!combo)
288                         return -EINVAL;
289                 modifier = combo->metadata.modifier;
290         }
291
292         /*
293          * i915 only supports linear/x-tiled above 4096 wide
294          */
295         if (huge_bo && modifier != I915_FORMAT_MOD_X_TILED && modifier != DRM_FORMAT_MOD_LINEAR) {
296                 uint32_t i;
297                 for (i = 0; modifiers && i < count; i++) {
298                         if (modifiers[i] == I915_FORMAT_MOD_X_TILED)
299                                 break;
300                 }
301                 if (i == count)
302                         modifier = DRM_FORMAT_MOD_LINEAR;
303                 else
304                         modifier = I915_FORMAT_MOD_X_TILED;
305         }
306
307         switch (modifier) {
308         case DRM_FORMAT_MOD_LINEAR:
309                 bo->meta.tiling = I915_TILING_NONE;
310                 break;
311         case I915_FORMAT_MOD_X_TILED:
312                 bo->meta.tiling = I915_TILING_X;
313                 break;
314         case I915_FORMAT_MOD_Y_TILED:
315         case I915_FORMAT_MOD_Y_TILED_CCS:
316                 bo->meta.tiling = I915_TILING_Y;
317                 break;
318         }
319
320         bo->meta.format_modifiers[0] = modifier;
321
322         if (format == DRM_FORMAT_YVU420_ANDROID) {
323                 /*
324                  * We only need to be able to use this as a linear texture,
325                  * which doesn't put any HW restrictions on how we lay it
326                  * out. The Android format does require the stride to be a
327                  * multiple of 16 and expects the Cr and Cb stride to be
328                  * ALIGN(Y_stride / 2, 16), which we can make happen by
329                  * aligning to 32 bytes here.
330                  */
331                 uint32_t stride = ALIGN(width, 32);
332                 drv_bo_from_format(bo, stride, height, format);
333         } else if (modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
334                 /*
335                  * For compressed surfaces, we need a color control surface
336                  * (CCS). Color compression is only supported for Y tiled
337                  * surfaces, and for each 32x16 tiles in the main surface we
338                  * need a tile in the control surface.  Y tiles are 128 bytes
339                  * wide and 32 lines tall and we use that to first compute the
340                  * width and height in tiles of the main surface. stride and
341                  * height are already multiples of 128 and 32, respectively:
342                  */
343                 uint32_t stride = drv_stride_from_format(format, width, 0);
344                 uint32_t width_in_tiles = DIV_ROUND_UP(stride, 128);
345                 uint32_t height_in_tiles = DIV_ROUND_UP(height, 32);
346                 uint32_t size = width_in_tiles * height_in_tiles * 4096;
347                 uint32_t offset = 0;
348
349                 bo->meta.strides[0] = width_in_tiles * 128;
350                 bo->meta.sizes[0] = size;
351                 bo->meta.offsets[0] = offset;
352                 offset += size;
353
354                 /*
355                  * Now, compute the width and height in tiles of the control
356                  * surface by dividing and rounding up.
357                  */
358                 uint32_t ccs_width_in_tiles = DIV_ROUND_UP(width_in_tiles, 32);
359                 uint32_t ccs_height_in_tiles = DIV_ROUND_UP(height_in_tiles, 16);
360                 uint32_t ccs_size = ccs_width_in_tiles * ccs_height_in_tiles * 4096;
361
362                 /*
363                  * With stride and height aligned to y tiles, offset is
364                  * already a multiple of 4096, which is the required alignment
365                  * of the CCS.
366                  */
367                 bo->meta.strides[1] = ccs_width_in_tiles * 128;
368                 bo->meta.sizes[1] = ccs_size;
369                 bo->meta.offsets[1] = offset;
370                 offset += ccs_size;
371
372                 bo->meta.num_planes = 2;
373                 bo->meta.total_size = offset;
374         } else {
375                 i915_bo_from_format(bo, width, height, format);
376         }
377         return 0;
378 }
379
380 static int i915_bo_create_from_metadata(struct bo *bo)
381 {
382         int ret;
383         size_t plane;
384         struct drm_i915_gem_create gem_create;
385         struct drm_i915_gem_set_tiling gem_set_tiling;
386
387         memset(&gem_create, 0, sizeof(gem_create));
388         gem_create.size = bo->meta.total_size;
389
390         ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
391         if (ret) {
392                 drv_log("DRM_IOCTL_I915_GEM_CREATE failed (size=%llu)\n", gem_create.size);
393                 return -errno;
394         }
395
396         for (plane = 0; plane < bo->meta.num_planes; plane++)
397                 bo->handles[plane].u32 = gem_create.handle;
398
399         memset(&gem_set_tiling, 0, sizeof(gem_set_tiling));
400         gem_set_tiling.handle = bo->handles[0].u32;
401         gem_set_tiling.tiling_mode = bo->meta.tiling;
402         gem_set_tiling.stride = bo->meta.strides[0];
403
404         ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_TILING, &gem_set_tiling);
405         if (ret) {
406                 struct drm_gem_close gem_close;
407                 memset(&gem_close, 0, sizeof(gem_close));
408                 gem_close.handle = bo->handles[0].u32;
409                 drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
410
411                 drv_log("DRM_IOCTL_I915_GEM_SET_TILING failed with %d\n", errno);
412                 return -errno;
413         }
414
415         return 0;
416 }
417
418 static void i915_close(struct driver *drv)
419 {
420         free(drv->priv);
421         drv->priv = NULL;
422 }
423
424 static int i915_bo_import(struct bo *bo, struct drv_import_fd_data *data)
425 {
426         int ret;
427         struct drm_i915_gem_get_tiling gem_get_tiling;
428
429         ret = drv_prime_bo_import(bo, data);
430         if (ret)
431                 return ret;
432
433         /* TODO(gsingh): export modifiers and get rid of backdoor tiling. */
434         memset(&gem_get_tiling, 0, sizeof(gem_get_tiling));
435         gem_get_tiling.handle = bo->handles[0].u32;
436
437         ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_GET_TILING, &gem_get_tiling);
438         if (ret) {
439                 drv_gem_bo_destroy(bo);
440                 drv_log("DRM_IOCTL_I915_GEM_GET_TILING failed.\n");
441                 return ret;
442         }
443
444         bo->meta.tiling = gem_get_tiling.tiling_mode;
445         return 0;
446 }
447
448 static void *i915_bo_map(struct bo *bo, struct vma *vma, size_t plane, uint32_t map_flags)
449 {
450         int ret;
451         void *addr;
452
453         if (bo->meta.format_modifiers[0] == I915_FORMAT_MOD_Y_TILED_CCS)
454                 return MAP_FAILED;
455
456         if (bo->meta.tiling == I915_TILING_NONE) {
457                 struct drm_i915_gem_mmap gem_map;
458                 memset(&gem_map, 0, sizeof(gem_map));
459
460                 /* TODO(b/118799155): We don't seem to have a good way to
461                  * detect the use cases for which WC mapping is really needed.
462                  * The current heuristic seems overly coarse and may be slowing
463                  * down some other use cases unnecessarily.
464                  *
465                  * For now, care must be taken not to use WC mappings for
466                  * Renderscript and camera use cases, as they're
467                  * performance-sensitive. */
468                 if ((bo->meta.use_flags & BO_USE_SCANOUT) &&
469                     !(bo->meta.use_flags &
470                       (BO_USE_RENDERSCRIPT | BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE)))
471                         gem_map.flags = I915_MMAP_WC;
472
473                 gem_map.handle = bo->handles[0].u32;
474                 gem_map.offset = 0;
475                 gem_map.size = bo->meta.total_size;
476
477                 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_map);
478                 if (ret) {
479                         drv_log("DRM_IOCTL_I915_GEM_MMAP failed\n");
480                         return MAP_FAILED;
481                 }
482
483                 addr = (void *)(uintptr_t)gem_map.addr_ptr;
484         } else {
485                 struct drm_i915_gem_mmap_gtt gem_map;
486                 memset(&gem_map, 0, sizeof(gem_map));
487
488                 gem_map.handle = bo->handles[0].u32;
489
490                 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gem_map);
491                 if (ret) {
492                         drv_log("DRM_IOCTL_I915_GEM_MMAP_GTT failed\n");
493                         return MAP_FAILED;
494                 }
495
496                 addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED,
497                             bo->drv->fd, gem_map.offset);
498         }
499
500         if (addr == MAP_FAILED) {
501                 drv_log("i915 GEM mmap failed\n");
502                 return addr;
503         }
504
505         vma->length = bo->meta.total_size;
506         return addr;
507 }
508
509 static int i915_bo_invalidate(struct bo *bo, struct mapping *mapping)
510 {
511         int ret;
512         struct drm_i915_gem_set_domain set_domain;
513
514         memset(&set_domain, 0, sizeof(set_domain));
515         set_domain.handle = bo->handles[0].u32;
516         if (bo->meta.tiling == I915_TILING_NONE) {
517                 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
518                 if (mapping->vma->map_flags & BO_MAP_WRITE)
519                         set_domain.write_domain = I915_GEM_DOMAIN_CPU;
520         } else {
521                 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
522                 if (mapping->vma->map_flags & BO_MAP_WRITE)
523                         set_domain.write_domain = I915_GEM_DOMAIN_GTT;
524         }
525
526         ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
527         if (ret) {
528                 drv_log("DRM_IOCTL_I915_GEM_SET_DOMAIN with %d\n", ret);
529                 return ret;
530         }
531
532         return 0;
533 }
534
535 static int i915_bo_flush(struct bo *bo, struct mapping *mapping)
536 {
537         struct i915_device *i915 = bo->drv->priv;
538         if (!i915->has_llc && bo->meta.tiling == I915_TILING_NONE)
539                 i915_clflush(mapping->vma->addr, mapping->vma->length);
540
541         return 0;
542 }
543
544 static uint32_t i915_resolve_format(struct driver *drv, uint32_t format, uint64_t use_flags)
545 {
546         switch (format) {
547         case DRM_FORMAT_FLEX_IMPLEMENTATION_DEFINED:
548                 /* KBL camera subsystem requires NV12. */
549                 if (use_flags & (BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE))
550                         return DRM_FORMAT_NV12;
551                 /*HACK: See b/28671744 */
552                 return DRM_FORMAT_XBGR8888;
553         case DRM_FORMAT_FLEX_YCbCr_420_888:
554                 /*
555                  * KBL camera subsystem requires NV12. Our other use cases
556                  * don't care:
557                  * - Hardware video supports NV12,
558                  * - USB Camera HALv3 supports NV12,
559                  * - USB Camera HALv1 doesn't use this format.
560                  * Moreover, NV12 is preferred for video, due to overlay
561                  * support on SKL+.
562                  */
563                 return DRM_FORMAT_NV12;
564         default:
565                 return format;
566         }
567 }
568
569 const struct backend backend_i915 = {
570         .name = "i915",
571         .init = i915_init,
572         .close = i915_close,
573         .bo_compute_metadata = i915_bo_compute_metadata,
574         .bo_create_from_metadata = i915_bo_create_from_metadata,
575         .bo_destroy = drv_gem_bo_destroy,
576         .bo_import = i915_bo_import,
577         .bo_map = i915_bo_map,
578         .bo_unmap = drv_bo_munmap,
579         .bo_invalidate = i915_bo_invalidate,
580         .bo_flush = i915_bo_flush,
581         .resolve_format = i915_resolve_format,
582 };
583
584 #endif