OSDN Git Service

radeonsi: fix a compute shader hang with big threadgroups on SI & CI
authorMarek Olšák <marek.olsak@amd.com>
Fri, 24 Jun 2016 00:22:42 +0000 (02:22 +0200)
committerEmil Velikov <emil.l.velikov@gmail.com>
Thu, 7 Jul 2016 15:12:30 +0000 (16:12 +0100)
ported from Vulkan

Cc: 12.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
(cherry picked from commit 1e8adb0ee43062210ca54821a880ef08bfdba1b7)
[Emil Velikov: resolve trivial conflict in si_launch_grid()]
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Conflicts:
src/gallium/drivers/radeonsi/si_compute.c

src/gallium/drivers/radeonsi/si_compute.c

index 09cbe13..484b252 100644 (file)
@@ -441,6 +441,21 @@ static void si_launch_grid(
        struct si_context *sctx = (struct si_context*)ctx;
        struct si_compute *program = sctx->cs_shader_state.program;
        int i;
+       /* HW bug workaround when CS threadgroups > 256 threads and async
+        * compute isn't used, i.e. only one compute job can run at a time.
+        * If async compute is possible, the threadgroup size must be limited
+        * to 256 threads on all queues to avoid the bug.
+        * Only SI and certain CIK chips are affected.
+        */
+       bool cs_regalloc_hang =
+               (sctx->b.chip_class == SI ||
+                sctx->b.family == CHIP_BONAIRE ||
+                sctx->b.family == CHIP_KABINI) &&
+               info->block[0] * info->block[1] * info->block[2] > 256;
+
+       if (cs_regalloc_hang)
+               sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+                                SI_CONTEXT_CS_PARTIAL_FLUSH;
 
        si_decompress_compute_textures(sctx);
 
@@ -487,6 +502,9 @@ static void si_launch_grid(
        si_emit_dispatch_packets(sctx, info);
 
        si_ce_post_draw_synchronization(sctx);
+
+       if (cs_regalloc_hang)
+               sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 }