minigbm:amdgpu: align stride to 256

[android-x86/external-minigbm.git] / amdgpu.c
diff --git a/amdgpu.c b/amdgpu.c

index d44d6d9..38ae2fc 100644 (file)
--- a/amdgpu.c
+++ b/amdgpu.c
@@ -134,20 +134,38 @@ static int amdgpu_create_bo(struct bo *bo, uint32_t width, uint32_t height, uint
         uint32_t plane, stride;
         struct combination *combo;
         union drm_amdgpu_gem_create gem_create;
-       struct amdgpu_priv *priv = bo->drv->priv;
  
         combo = drv_get_combination(bo->drv, format, use_flags);
         if (!combo)
                 return -EINVAL;
  
-       if (combo->metadata.tiling == TILE_TYPE_DRI)
+       if (combo->metadata.tiling == TILE_TYPE_DRI) {
+               bool needs_alignment = false;
+#ifdef __ANDROID__
+               /*
+                * Currently, the gralloc API doesn't differentiate between allocation time and map
+                * time strides. A workaround for amdgpu DRI buffers is to always to align to 256 at
+                * allocation time.
+                *
+                * See b/115946221,b/117942643
+                */
+               if (use_flags & (BO_USE_SW_MASK))
+                       needs_alignment = true;
+#endif
+               // See b/122049612
+               if (use_flags & (BO_USE_SCANOUT))
+                       needs_alignment = true;
+
+               if (needs_alignment) {
+                       uint32_t bytes_per_pixel = drv_bytes_per_pixel_from_format(format, 0);
+                       width = ALIGN(width, 256 / bytes_per_pixel);
+               }
+
                 return dri_bo_create(bo, width, height, format, use_flags);
+       }
  
         stride = drv_stride_from_format(format, width, 0);
-       if (format == DRM_FORMAT_YVU420_ANDROID)
-               stride = ALIGN(stride, 128);
-       else
-               stride = ALIGN(stride, 64);
+       stride = ALIGN(stride,256);
  
         drv_bo_from_format(bo, stride, height, format);
  
@@ -163,12 +181,6 @@ static int amdgpu_create_bo(struct bo *bo, uint32_t width, uint32_t height, uint
         if (!(use_flags & (BO_USE_SW_READ_OFTEN | BO_USE_SCANOUT)))
                 gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
  
-       /* If drm_version >= 21 everything exposes explicit synchronization primitives
-          and chromeos/arc++ will use them. Disable implicit synchronization. */
-       if (priv->drm_version >= 21) {
-               gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
-       }
-
         /* Allocate the buffer with the preferred heap. */
         ret = drmCommandWriteRead(drv_get_fd(bo->drv), DRM_AMDGPU_GEM_CREATE, &gem_create,
                                   sizeof(gem_create));
@@ -233,6 +245,32 @@ static int amdgpu_unmap_bo(struct bo *bo, struct vma *vma)
                 return munmap(vma->addr, vma->length);
  }
  
+static int amdgpu_bo_invalidate(struct bo *bo, struct mapping *mapping)
+{
+       int ret;
+       union drm_amdgpu_gem_wait_idle wait_idle;
+
+       if (bo->priv)
+               return 0;
+
+       memset(&wait_idle, 0, sizeof(wait_idle));
+       wait_idle.in.handle = bo->handles[0].u32;
+       wait_idle.in.timeout = AMDGPU_TIMEOUT_INFINITE;
+
+       ret = drmCommandWriteRead(bo->drv->fd, DRM_AMDGPU_GEM_WAIT_IDLE, &wait_idle,
+                                 sizeof(wait_idle));
+
+       if (ret < 0) {
+               drv_log("DRM_AMDGPU_GEM_WAIT_IDLE failed with %d\n", ret);
+               return ret;
+       }
+
+       if (ret == 0 && wait_idle.out.status)
+               drv_log("DRM_AMDGPU_GEM_WAIT_IDLE BO is busy\n");
+
+       return 0;
+}
+
  static uint32_t amdgpu_resolve_format(uint32_t format, uint64_t use_flags)
  {
         switch (format) {
@@ -258,6 +296,7 @@ const struct backend backend_amdgpu = {
         .bo_import = amdgpu_import_bo,
         .bo_map = amdgpu_map_bo,
         .bo_unmap = amdgpu_unmap_bo,
+       .bo_invalidate = amdgpu_bo_invalidate,
         .resolve_format = amdgpu_resolve_format,
  };