OSDN Git Service

ddebug: rewrite to always use a threaded approach
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Sun, 22 Oct 2017 15:38:59 +0000 (17:38 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Thu, 9 Nov 2017 13:01:03 +0000 (14:01 +0100)
This patch has multiple goals:

1. Off-load the writing of records in 'always' mode to another thread
   for performance.

2. Allow using ddebug with threaded contexts. This really forces us to
   move some of the "after_draw" handling into another thread.

3. Simplify the different modes of ddebug, both in the code and in
   the user interface, i.e. GALLIUM_DDEBUG. In particular, there's
   no 'pipelined' anymore, since we're always pipelined; and 'noflush'
   is replaced by 'flush', since we no longer flush by default.

4. Fix the fences in pipelining mode. They previously relied on writes
   via pipe_context::clear_buffer. However, on radeonsi, those could
   (quite reasonably) end up in the SDMA buffer. So we use the newly
   added PIPE_FLUSH_{TOP,BOTTOM}_OF_PIPE fences instead.

5. Improve pipelined mode overall, using the finer grained information
   provided by the new fences.

Overall, the result is that pipelined mode should be more useful, and
using ddebug in default mode is much less invasive, in the sense that
it changes the overall driver behavior less (which is kind of crucial
for a driver debugging tool).

An example of the new hang debug output:

  Gallium debugger active.
  Hang detection timeout is 1000ms.
  GPU hang detected, collecting information...

  Draw #   driver  prev BOP  TOP  BOP  dump file
  -------------------------------------------------------------
  2          YES      YES    YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000000
  3          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000001
  4          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000002
  5          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000003

  Done.

We can see that there were almost certainly 4 draws in flight when
the hang happened: the top-of-pipe fence was signaled for all 4 draws,
the bottom-of-pipe fence for none of them. In virtually all cases,
we'd expect the first draw in the list to be at fault, but due to the
GPU parallelism, it's possible (though highly unlikely) that one of
the later draws causes a component to get stuck in a way that prevents
the earlier draws from making progress as well.

(In the above example, there were actually only 3 draws truly in flight:
the last draw is a blit that waits for the earlier draws; however, its
top-of-pipe fence is emitted before the cache flush and wait, and so
the fact that the draw hasn't truly started yet can only be seen from a
closer inspection of GPU state.)

Acked-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/ddebug/dd_context.c
src/gallium/drivers/ddebug/dd_draw.c
src/gallium/drivers/ddebug/dd_pipe.h
src/gallium/drivers/ddebug/dd_screen.c

index 2abbff9..558708d 100644 (file)
@@ -564,30 +564,33 @@ dd_context_set_stream_output_targets(struct pipe_context *_pipe,
    pipe->set_stream_output_targets(pipe, num_targets, tgs, offsets);
 }
 
+void
+dd_thread_join(struct dd_context *dctx)
+{
+   mtx_lock(&dctx->mutex);
+   dctx->kill_thread = true;
+   cnd_signal(&dctx->cond);
+   mtx_unlock(&dctx->mutex);
+   thrd_join(dctx->thread, NULL);
+}
+
 static void
 dd_context_destroy(struct pipe_context *_pipe)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
 
-   if (dctx->thread) {
-      mtx_lock(&dctx->mutex);
-      dctx->kill_thread = 1;
-      mtx_unlock(&dctx->mutex);
-      thrd_join(dctx->thread, NULL);
-      mtx_destroy(&dctx->mutex);
-      assert(!dctx->records);
-   }
+   dd_thread_join(dctx);
+   mtx_destroy(&dctx->mutex);
+   cnd_destroy(&dctx->cond);
 
-   if (dctx->fence) {
-      pipe->transfer_unmap(pipe, dctx->fence_transfer);
-      pipe_resource_reference(&dctx->fence, NULL);
-   }
+   assert(list_empty(&dctx->records));
+   assert(!dctx->record_pending);
 
    if (pipe->set_log_context) {
       pipe->set_log_context(pipe, NULL);
 
-      if (dd_screen(dctx->base.screen)->mode == DD_DUMP_ALL_CALLS) {
+      if (dd_screen(dctx->base.screen)->dump_mode == DD_DUMP_ALL_CALLS) {
          FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), 0);
          if (f) {
             fprintf(f, "Remainder of driver log:\n\n");
@@ -921,39 +924,19 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
 
    dctx->draw_state.sample_mask = ~0;
 
-   if (dscreen->mode == DD_DETECT_HANGS_PIPELINED) {
-      dctx->fence = pipe_buffer_create(dscreen->screen, PIPE_BIND_CUSTOM,
-                                            PIPE_USAGE_STAGING, 4);
-      if (!dctx->fence)
-         goto fail;
-
-      dctx->mapped_fence = pipe_buffer_map(pipe, dctx->fence,
-                                           PIPE_TRANSFER_READ_WRITE |
-                                           PIPE_TRANSFER_PERSISTENT |
-                                           PIPE_TRANSFER_COHERENT,
-                                           &dctx->fence_transfer);
-      if (!dctx->mapped_fence)
-         goto fail;
-
-      *dctx->mapped_fence = 0;
-
-      (void) mtx_init(&dctx->mutex, mtx_plain);
-      dctx->thread = u_thread_create(dd_thread_pipelined_hang_detect, dctx);
-      if (!dctx->thread) {
-         mtx_destroy(&dctx->mutex);
-         goto fail;
-      }
+   list_inithead(&dctx->records);
+   (void) mtx_init(&dctx->mutex, mtx_plain);
+   (void) cnd_init(&dctx->cond);
+   dctx->thread = u_thread_create(dd_thread_main, dctx);
+   if (!dctx->thread) {
+      mtx_destroy(&dctx->mutex);
+      goto fail;
    }
 
    return &dctx->base;
 
 fail:
-   if (dctx) {
-      if (dctx->mapped_fence)
-         pipe_transfer_unmap(pipe, dctx->fence_transfer);
-      pipe_resource_reference(&dctx->fence, NULL);
-      FREE(dctx);
-   }
+   FREE(dctx);
    pipe->destroy(pipe);
    return NULL;
 }
index a15801b..99c9c92 100644 (file)
 #include <inttypes.h>
 
 
-FILE *
-dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number)
+static void
+dd_write_header(FILE *f, struct pipe_screen *screen, unsigned apitrace_call_number)
 {
-   struct pipe_screen *screen = dscreen->screen;
    char cmd_line[4096];
-
-   FILE *f = dd_get_debug_file(dscreen->verbose);
-   if (!f)
-      return NULL;
-
    if (os_get_command_line(cmd_line, sizeof(cmd_line)))
       fprintf(f, "Command: %s\n", cmd_line);
    fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
@@ -56,8 +50,19 @@ dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number)
    fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
 
    if (apitrace_call_number)
-      fprintf(f, "Last apitrace call: %u\n\n",
-              apitrace_call_number);
+      fprintf(f, "Last apitrace call: %u\n\n", apitrace_call_number);
+}
+
+FILE *
+dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number)
+{
+   struct pipe_screen *screen = dscreen->screen;
+
+   FILE *f = dd_get_debug_file(dscreen->verbose);
+   if (!f)
+      return NULL;
+
+   dd_write_header(f, screen, apitrace_call_number);
    return f;
 }
 
@@ -77,12 +82,6 @@ dd_dump_dmesg(FILE *f)
    pclose(p);
 }
 
-static void
-dd_close_file_stream(FILE *f)
-{
-   fclose(f);
-}
-
 static unsigned
 dd_num_active_viewports(struct dd_draw_state *dstate)
 {
@@ -550,29 +549,6 @@ dd_dump_call(FILE *f, struct dd_draw_state *state, struct dd_call *call)
 }
 
 static void
-dd_write_report(struct dd_context *dctx, struct dd_call *call, unsigned flags,
-                bool dump_dmesg)
-{
-   FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
-                                dctx->draw_state.apitrace_call_number);
-
-   if (!f)
-      return;
-
-   dd_dump_call(f, &dctx->draw_state, call);
-   dd_dump_driver_state(dctx, f, flags);
-
-   fprintf(f,"\n\n**************************************************"
-             "***************************\n");
-   fprintf(f, "Context Log:\n\n");
-   u_log_new_page_print(&dctx->log, f);
-
-   if (dump_dmesg)
-      dd_dump_dmesg(f);
-   dd_close_file_stream(f);
-}
-
-static void
 dd_kill_process(void)
 {
    sync();
@@ -582,54 +558,6 @@ dd_kill_process(void)
    exit(1);
 }
 
-static bool
-dd_flush_and_check_hang(struct dd_context *dctx,
-                        struct pipe_fence_handle **flush_fence,
-                        unsigned flush_flags)
-{
-   struct pipe_fence_handle *fence = NULL;
-   struct pipe_context *pipe = dctx->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   uint64_t timeout_ms = dd_screen(dctx->base.screen)->timeout_ms;
-   bool idle;
-
-   assert(timeout_ms > 0);
-
-   pipe->flush(pipe, &fence, flush_flags);
-   if (flush_fence)
-      screen->fence_reference(screen, flush_fence, fence);
-   if (!fence)
-      return false;
-
-   idle = screen->fence_finish(screen, pipe, fence, timeout_ms * 1000000);
-   screen->fence_reference(screen, &fence, NULL);
-   if (!idle)
-      fprintf(stderr, "dd: GPU hang detected!\n");
-   return !idle;
-}
-
-static void
-dd_flush_and_handle_hang(struct dd_context *dctx,
-                         struct pipe_fence_handle **fence, unsigned flags,
-                         const char *cause)
-{
-   if (dd_flush_and_check_hang(dctx, fence, flags)) {
-      FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
-                                   dctx->draw_state.apitrace_call_number);
-
-      if (f) {
-         fprintf(f, "dd: %s.\n", cause);
-         dd_dump_driver_state(dctx, f,
-                              PIPE_DUMP_DEVICE_STATUS_REGISTERS);
-         dd_dump_dmesg(f);
-         dd_close_file_stream(f);
-      }
-
-      /* Terminate the process to prevent future hangs. */
-      dd_kill_process();
-   }
-}
-
 static void
 dd_unreference_copy_of_call(struct dd_call *dst)
 {
@@ -679,89 +607,6 @@ dd_unreference_copy_of_call(struct dd_call *dst)
 }
 
 static void
-dd_copy_call(struct dd_call *dst, struct dd_call *src)
-{
-   dst->type = src->type;
-
-   switch (src->type) {
-   case CALL_DRAW_VBO:
-      pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output,
-                               src->info.draw_vbo.draw.count_from_stream_output);
-      pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer,
-                              src->info.draw_vbo.indirect.buffer);
-      pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count,
-                              src->info.draw_vbo.indirect.indirect_draw_count);
-
-      if (dst->info.draw_vbo.draw.index_size &&
-          !dst->info.draw_vbo.draw.has_user_indices)
-         pipe_resource_reference(&dst->info.draw_vbo.draw.index.resource, NULL);
-      else
-         dst->info.draw_vbo.draw.index.user = NULL;
-
-      if (src->info.draw_vbo.draw.index_size &&
-          !src->info.draw_vbo.draw.has_user_indices) {
-         pipe_resource_reference(&dst->info.draw_vbo.draw.index.resource,
-                                 src->info.draw_vbo.draw.index.resource);
-      }
-
-      dst->info.draw_vbo = src->info.draw_vbo;
-      if (!src->info.draw_vbo.draw.indirect)
-         dst->info.draw_vbo.draw.indirect = NULL;
-      else
-         dst->info.draw_vbo.draw.indirect = &dst->info.draw_vbo.indirect;
-      break;
-   case CALL_LAUNCH_GRID:
-      pipe_resource_reference(&dst->info.launch_grid.indirect,
-                              src->info.launch_grid.indirect);
-      dst->info.launch_grid = src->info.launch_grid;
-      break;
-   case CALL_RESOURCE_COPY_REGION:
-      pipe_resource_reference(&dst->info.resource_copy_region.dst,
-                              src->info.resource_copy_region.dst);
-      pipe_resource_reference(&dst->info.resource_copy_region.src,
-                              src->info.resource_copy_region.src);
-      dst->info.resource_copy_region = src->info.resource_copy_region;
-      break;
-   case CALL_BLIT:
-      pipe_resource_reference(&dst->info.blit.dst.resource,
-                              src->info.blit.dst.resource);
-      pipe_resource_reference(&dst->info.blit.src.resource,
-                              src->info.blit.src.resource);
-      dst->info.blit = src->info.blit;
-      break;
-   case CALL_FLUSH_RESOURCE:
-      pipe_resource_reference(&dst->info.flush_resource,
-                              src->info.flush_resource);
-      break;
-   case CALL_CLEAR:
-      dst->info.clear = src->info.clear;
-      break;
-   case CALL_CLEAR_BUFFER:
-      pipe_resource_reference(&dst->info.clear_buffer.res,
-                              src->info.clear_buffer.res);
-      dst->info.clear_buffer = src->info.clear_buffer;
-      break;
-   case CALL_CLEAR_TEXTURE:
-      break;
-   case CALL_CLEAR_RENDER_TARGET:
-      break;
-   case CALL_CLEAR_DEPTH_STENCIL:
-      break;
-   case CALL_GENERATE_MIPMAP:
-      pipe_resource_reference(&dst->info.generate_mipmap.res,
-                              src->info.generate_mipmap.res);
-      dst->info.generate_mipmap = src->info.generate_mipmap;
-      break;
-   case CALL_GET_QUERY_RESULT_RESOURCE:
-      pipe_resource_reference(&dst->info.get_query_result_resource.resource,
-                              src->info.get_query_result_resource.resource);
-      dst->info.get_query_result_resource = src->info.get_query_result_resource;
-      dst->info.get_query_result_resource.query = NULL;
-      break;
-   }
-}
-
-static void
 dd_init_copy_of_draw_state(struct dd_draw_state_copy *state)
 {
    unsigned i,j;
@@ -935,137 +780,225 @@ dd_copy_draw_state(struct dd_draw_state *dst, struct dd_draw_state *src)
 }
 
 static void
-dd_free_record(struct dd_draw_record **record)
+dd_free_record(struct pipe_screen *screen, struct dd_draw_record *record)
+{
+   u_log_page_destroy(record->log_page);
+   dd_unreference_copy_of_call(&record->call);
+   dd_unreference_copy_of_draw_state(&record->draw_state);
+   screen->fence_reference(screen, &record->prev_bottom_of_pipe, NULL);
+   screen->fence_reference(screen, &record->top_of_pipe, NULL);
+   screen->fence_reference(screen, &record->bottom_of_pipe, NULL);
+   util_queue_fence_destroy(&record->driver_finished);
+   FREE(record);
+}
+
+static void
+dd_write_record(FILE *f, struct dd_draw_record *record)
 {
-   struct dd_draw_record *next = (*record)->next;
+   dd_dump_call(f, &record->draw_state.base, &record->call);
 
-   u_log_page_destroy((*record)->log_page);
-   dd_unreference_copy_of_call(&(*record)->call);
-   dd_unreference_copy_of_draw_state(&(*record)->draw_state);
-   FREE(*record);
-   *record = next;
+   if (record->log_page) {
+      fprintf(f,"\n\n**************************************************"
+                "***************************\n");
+      fprintf(f, "Context Log:\n\n");
+      u_log_page_print(record->log_page, f);
+   }
 }
 
 static void
-dd_dump_record(struct dd_context *dctx, struct dd_draw_record *record,
-               uint32_t hw_sequence_no, int64_t now)
+dd_maybe_dump_record(struct dd_screen *dscreen, struct dd_draw_record *record)
 {
-   FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
-                                record->draw_state.base.apitrace_call_number);
-   if (!f)
+   if (dscreen->dump_mode == DD_DUMP_ONLY_HANGS ||
+       (dscreen->dump_mode == DD_DUMP_APITRACE_CALL &&
+        dscreen->apitrace_dump_call != record->draw_state.base.apitrace_call_number))
       return;
 
-   fprintf(f, "Draw call sequence # = %u\n", record->sequence_no);
-   fprintf(f, "HW reached sequence # = %u\n", hw_sequence_no);
-   fprintf(f, "Elapsed time = %"PRIi64" ms\n\n",
-           (now - record->timestamp) / 1000);
-
-   dd_dump_call(f, &record->draw_state.base, &record->call);
+   char name[512];
+   dd_get_debug_filename_and_mkdir(name, sizeof(name), dscreen->verbose);
+   FILE *f = fopen(name, "w");
+   if (!f) {
+      fprintf(stderr, "dd: failed to open %s\n", name);
+      return;
+   }
 
-   fprintf(f,"\n\n**************************************************"
-             "***************************\n");
-   fprintf(f, "Context Log:\n\n");
-   u_log_page_print(record->log_page, f);
+   dd_write_header(f, dscreen->screen, record->draw_state.base.apitrace_call_number);
+   dd_write_record(f, record);
 
-   dctx->pipe->dump_debug_state(dctx->pipe, f,
-                                PIPE_DUMP_DEVICE_STATUS_REGISTERS);
-   dd_dump_dmesg(f);
    fclose(f);
 }
 
-int
-dd_thread_pipelined_hang_detect(void *input)
+static const char *
+dd_fence_state(struct pipe_screen *screen, struct pipe_fence_handle *fence,
+               bool *not_reached)
+{
+   if (!fence)
+      return "---";
+
+   bool ok = screen->fence_finish(screen, NULL, fence, 0);
+
+   if (not_reached && !ok)
+      *not_reached = true;
+
+   return ok ? "YES" : "NO ";
+}
+
+static void
+dd_report_hang(struct dd_context *dctx)
 {
-   struct dd_context *dctx = (struct dd_context *)input;
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+   struct pipe_screen *screen = dscreen->screen;
+   bool encountered_hang = false;
+   bool stop_output = false;
+   unsigned num_later = 0;
 
-   mtx_lock(&dctx->mutex);
+   fprintf(stderr, "GPU hang detected, collecting information...\n\n");
 
-   while (!dctx->kill_thread) {
-      struct dd_draw_record **record = &dctx->records;
+   fprintf(stderr, "Draw #   driver  prev BOP  TOP  BOP  dump file\n"
+                   "-------------------------------------------------------------\n");
 
-      /* Loop over all records. */
-      while (*record) {
-         int64_t now;
+   list_for_each_entry(struct dd_draw_record, record, &dctx->records, list) {
+      if (!encountered_hang &&
+          screen->fence_finish(screen, NULL, record->bottom_of_pipe, 0)) {
+         dd_maybe_dump_record(dscreen, record);
+         continue;
+      }
 
-         /* If the fence has been signalled, release the record and all older
-          * records.
-          */
-         if (*dctx->mapped_fence >= (*record)->sequence_no) {
-            while (*record)
-               dd_free_record(record);
-            break;
-         }
+      if (stop_output) {
+         dd_maybe_dump_record(dscreen, record);
+         num_later++;
+         continue;
+      }
 
-         /* The fence hasn't been signalled. Check the timeout. */
-         now = os_time_get();
-         if (os_time_timeout((*record)->timestamp,
-                             (*record)->timestamp + dscreen->timeout_ms * 1000,
-                             now)) {
-            fprintf(stderr, "GPU hang detected.\n");
+      bool driver = util_queue_fence_is_signalled(&record->driver_finished);
+      bool top_not_reached = false;
+      const char *prev_bop = dd_fence_state(screen, record->prev_bottom_of_pipe, NULL);
+      const char *top = dd_fence_state(screen, record->top_of_pipe, &top_not_reached);
+      const char *bop = dd_fence_state(screen, record->bottom_of_pipe, NULL);
 
-            /* Get the oldest unsignalled draw call. */
-            while ((*record)->next &&
-                   *dctx->mapped_fence < (*record)->next->sequence_no)
-               record = &(*record)->next;
+      fprintf(stderr, "%-9u %s      %s     %s  %s  ",
+              record->draw_call, driver ? "YES" : "NO ", prev_bop, top, bop);
 
-            dd_dump_record(dctx, *record, *dctx->mapped_fence, now);
-            dd_kill_process();
+      char name[512];
+      dd_get_debug_filename_and_mkdir(name, sizeof(name), false);
+
+      FILE *f = fopen(name, "w");
+      if (!f) {
+         fprintf(stderr, "fopen failed\n");
+      } else {
+         fprintf(stderr, "%s\n", name);
+
+         dd_write_header(f, dscreen->screen, record->draw_state.base.apitrace_call_number);
+         dd_write_record(f, record);
+
+         if (!encountered_hang) {
+            dd_dump_driver_state(dctx, f, PIPE_DUMP_DEVICE_STATUS_REGISTERS);
+            dd_dump_dmesg(f);
          }
 
-         record = &(*record)->next;
+         fclose(f);
       }
 
-      /* Unlock and sleep before starting all over again. */
-      mtx_unlock(&dctx->mutex);
-      os_time_sleep(10000); /* 10 ms */
-      mtx_lock(&dctx->mutex);
+      if (top_not_reached)
+         stop_output = true;
+      encountered_hang = true;
    }
 
-   /* Thread termination. */
-   while (dctx->records)
-      dd_free_record(&dctx->records);
+   if (num_later || dctx->record_pending) {
+      fprintf(stderr, "... and %u%s additional draws.\n", num_later,
+              dctx->record_pending ? "+1 (pending)" : "");
+   }
+
+   fprintf(stderr, "\nDone.\n");
+   dd_kill_process();
+}
 
+int
+dd_thread_main(void *input)
+{
+   struct dd_context *dctx = (struct dd_context *)input;
+   struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+   struct pipe_screen *screen = dscreen->screen;
+
+   mtx_lock(&dctx->mutex);
+
+   for (;;) {
+      struct list_head records;
+      struct pipe_fence_handle *fence;
+      struct pipe_fence_handle *fence2 = NULL;
+
+      list_replace(&dctx->records, &records);
+      list_inithead(&dctx->records);
+      dctx->num_records = 0;
+
+      if (dctx->api_stalled)
+         cnd_signal(&dctx->cond);
+
+      if (!list_empty(&records)) {
+         /* Wait for the youngest draw. This means hangs can take a bit longer
+          * to detect, but it's more efficient this way. */
+         struct dd_draw_record *youngest =
+            LIST_ENTRY(struct dd_draw_record, records.prev, list);
+         fence = youngest->bottom_of_pipe;
+      } else if (dctx->record_pending) {
+         /* Wait for pending fences, in case the driver ends up hanging internally. */
+         fence = dctx->record_pending->prev_bottom_of_pipe;
+         fence2 = dctx->record_pending->top_of_pipe;
+      } else if (dctx->kill_thread) {
+         break;
+      } else {
+         cnd_wait(&dctx->cond, &dctx->mutex);
+         continue;
+      }
+      mtx_unlock(&dctx->mutex);
+
+      /* Fences can be NULL legitimately when timeout detection is disabled. */
+      if ((fence &&
+           !screen->fence_finish(screen, NULL, fence,
+                                 dscreen->timeout_ms * 1000*1000)) ||
+          (fence2 &&
+           !screen->fence_finish(screen, NULL, fence2,
+                                 dscreen->timeout_ms * 1000*1000))) {
+         mtx_lock(&dctx->mutex);
+         list_splice(&records, &dctx->records);
+         dd_report_hang(dctx);
+         /* we won't actually get here */
+         mtx_unlock(&dctx->mutex);
+      }
+
+      list_for_each_entry_safe(struct dd_draw_record, record, &records, list) {
+         dd_maybe_dump_record(dscreen, record);
+         list_del(&record->list);
+         dd_free_record(screen, record);
+      }
+
+      mtx_lock(&dctx->mutex);
+   }
    mtx_unlock(&dctx->mutex);
    return 0;
 }
 
-static void
-dd_pipelined_process_draw(struct dd_context *dctx, struct dd_call *call)
+static struct dd_draw_record *
+dd_create_record(struct dd_context *dctx)
 {
-   struct pipe_context *pipe = dctx->pipe;
    struct dd_draw_record *record;
 
-   /* Make a record of the draw call. */
    record = MALLOC_STRUCT(dd_draw_record);
    if (!record)
-      return;
+      return NULL;
 
-   /* Update the fence with the GPU.
-    *
-    * radeonsi/clear_buffer waits in the command processor until shaders are
-    * idle before writing to memory. That's a necessary condition for isolating
-    * draw calls.
-    */
-   dctx->sequence_no++;
-   pipe->clear_buffer(pipe, dctx->fence, 0, 4, &dctx->sequence_no, 4);
+   record->dctx = dctx;
+   record->draw_call = dctx->num_draw_calls;
 
-   /* Initialize the record. */
-   record->timestamp = os_time_get();
-   record->sequence_no = dctx->sequence_no;
-   record->log_page = u_log_new_page(&dctx->log);
-
-   memset(&record->call, 0, sizeof(record->call));
-   dd_copy_call(&record->call, call);
+   record->prev_bottom_of_pipe = NULL;
+   record->top_of_pipe = NULL;
+   record->bottom_of_pipe = NULL;
+   record->log_page = NULL;
+   util_queue_fence_init(&record->driver_finished);
 
    dd_init_copy_of_draw_state(&record->draw_state);
    dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state);
 
-   /* Add the record to the list. */
-   mtx_lock(&dctx->mutex);
-   record->next = dctx->records;
-   dctx->records = record;
-   mtx_unlock(&dctx->mutex);
+   return record;
 }
 
 static void
@@ -1075,78 +1008,96 @@ dd_context_flush(struct pipe_context *_pipe,
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
 
-   switch (dd_screen(dctx->base.screen)->mode) {
-   case DD_DETECT_HANGS:
-      dd_flush_and_handle_hang(dctx, fence, flags,
-                               "GPU hang detected in pipe->flush()");
-      break;
-   case DD_DETECT_HANGS_PIPELINED: /* nothing to do here */
-   case DD_DUMP_ALL_CALLS:
-   case DD_DUMP_APITRACE_CALL:
-      pipe->flush(pipe, fence, flags);
-      break;
-   default:
-      assert(0);
+   pipe->flush(pipe, fence, flags);
+}
+
+static void
+dd_before_draw(struct dd_context *dctx, struct dd_draw_record *record)
+{
+   struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+   struct pipe_context *pipe = dctx->pipe;
+   struct pipe_screen *screen = dscreen->screen;
+
+   if (dscreen->timeout_ms > 0) {
+      if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count) {
+         pipe->flush(pipe, &record->prev_bottom_of_pipe, 0);
+         screen->fence_reference(screen, &record->top_of_pipe, record->prev_bottom_of_pipe);
+      } else {
+         pipe->flush(pipe, &record->prev_bottom_of_pipe,
+                     PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE);
+         pipe->flush(pipe, &record->top_of_pipe,
+                     PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE);
+      }
+
+      mtx_lock(&dctx->mutex);
+      dctx->record_pending = record;
+      if (list_empty(&dctx->records))
+         cnd_signal(&dctx->cond);
+      mtx_unlock(&dctx->mutex);
    }
 }
 
 static void
-dd_before_draw(struct dd_context *dctx)
+dd_after_draw_async(void *data)
 {
+   struct dd_draw_record *record = (struct dd_draw_record *)data;
+   struct dd_context *dctx = record->dctx;
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);
 
-   if (dscreen->mode == DD_DETECT_HANGS &&
-       !dscreen->no_flush &&
-       dctx->num_draw_calls >= dscreen->skip_count)
-      dd_flush_and_handle_hang(dctx, NULL, 0,
-                               "GPU hang most likely caused by internal "
-                               "driver commands");
+   record->log_page = u_log_new_page(&dctx->log);
+
+   if (!util_queue_fence_is_signalled(&record->driver_finished))
+      util_queue_fence_signal(&record->driver_finished);
+
+   if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL &&
+       dscreen->apitrace_dump_call > dctx->draw_state.apitrace_call_number) {
+      dd_thread_join(dctx);
+      /* No need to continue. */
+      exit(0);
+   }
 }
 
 static void
-dd_after_draw(struct dd_context *dctx, struct dd_call *call)
+dd_after_draw(struct dd_context *dctx, struct dd_draw_record *record)
 {
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);
    struct pipe_context *pipe = dctx->pipe;
 
-   if (dctx->num_draw_calls >= dscreen->skip_count) {
-      switch (dscreen->mode) {
-      case DD_DETECT_HANGS:
-         if (!dscreen->no_flush &&
-            dd_flush_and_check_hang(dctx, NULL, 0)) {
-            dd_write_report(dctx, call,
-                         PIPE_DUMP_DEVICE_STATUS_REGISTERS,
-                         true);
-
-            /* Terminate the process to prevent future hangs. */
-            dd_kill_process();
-         } else {
-            u_log_page_destroy(u_log_new_page(&dctx->log));
-         }
-         break;
-      case DD_DETECT_HANGS_PIPELINED:
-         dd_pipelined_process_draw(dctx, call);
-         break;
-      case DD_DUMP_ALL_CALLS:
-         if (!dscreen->no_flush)
-            pipe->flush(pipe, NULL, 0);
-         dd_write_report(dctx, call, 0, false);
-         break;
-      case DD_DUMP_APITRACE_CALL:
-         if (dscreen->apitrace_dump_call ==
-             dctx->draw_state.apitrace_call_number) {
-            dd_write_report(dctx, call, 0, false);
-            /* No need to continue. */
-            exit(0);
-         } else {
-            u_log_page_destroy(u_log_new_page(&dctx->log));
-         }
-         break;
-      default:
-         assert(0);
-      }
+   if (dscreen->timeout_ms > 0) {
+      unsigned flush_flags;
+      if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count)
+         flush_flags = 0;
+      else
+         flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE;
+      pipe->flush(pipe, &record->bottom_of_pipe, flush_flags);
+
+      assert(record == dctx->record_pending);
    }
 
+   if (pipe->callback) {
+      util_queue_fence_reset(&record->driver_finished);
+      pipe->callback(pipe, dd_after_draw_async, record, true);
+   } else {
+      dd_after_draw_async(record);
+   }
+
+   mtx_lock(&dctx->mutex);
+   if (unlikely(dctx->num_records > 10000)) {
+      dctx->api_stalled = true;
+      /* Since this is only a heuristic to prevent the API thread from getting
+       * too far ahead, we don't need a loop here. */
+      cnd_wait(&dctx->cond, &dctx->mutex);
+      dctx->api_stalled = false;
+   }
+
+   if (list_empty(&dctx->records))
+      cnd_signal(&dctx->cond);
+
+   list_addtail(&record->list, &dctx->records);
+   dctx->record_pending = NULL;
+   dctx->num_records++;
+   mtx_unlock(&dctx->mutex);
+
    ++dctx->num_draw_calls;
    if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0)
       fprintf(stderr, "Gallium debugger reached %u draw calls.\n",
@@ -1159,20 +1110,36 @@ dd_context_draw_vbo(struct pipe_context *_pipe,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
+
+   record->call.type = CALL_DRAW_VBO;
+   record->call.info.draw_vbo.draw = *info;
+   record->call.info.draw_vbo.draw.count_from_stream_output = NULL;
+   pipe_so_target_reference(&record->call.info.draw_vbo.draw.count_from_stream_output,
+                            info->count_from_stream_output);
+   if (info->index_size && !info->has_user_indices) {
+      record->call.info.draw_vbo.draw.index.resource = NULL;
+      pipe_resource_reference(&record->call.info.draw_vbo.draw.index.resource,
+                              info->index.resource);
+   }
 
-   call.type = CALL_DRAW_VBO;
-   call.info.draw_vbo.draw = *info;
    if (info->indirect) {
-      call.info.draw_vbo.indirect = *info->indirect;
-      call.info.draw_vbo.draw.indirect = &call.info.draw_vbo.indirect;
+      record->call.info.draw_vbo.indirect = *info->indirect;
+      record->call.info.draw_vbo.draw.indirect = &record->call.info.draw_vbo.indirect;
+
+      record->call.info.draw_vbo.indirect.buffer = NULL;
+      pipe_resource_reference(&record->call.info.draw_vbo.indirect.buffer,
+                              info->indirect->buffer);
+      record->call.info.draw_vbo.indirect.indirect_draw_count = NULL;
+      pipe_resource_reference(&record->call.info.draw_vbo.indirect.indirect_draw_count,
+                              info->indirect->indirect_draw_count);
    } else {
-      memset(&call.info.draw_vbo.indirect, 0, sizeof(*info->indirect));
+      memset(&record->call.info.draw_vbo.indirect, 0, sizeof(*info->indirect));
    }
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->draw_vbo(pipe, info);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1181,14 +1148,16 @@ dd_context_launch_grid(struct pipe_context *_pipe,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_LAUNCH_GRID;
-   call.info.launch_grid = *info;
+   record->call.type = CALL_LAUNCH_GRID;
+   record->call.info.launch_grid = *info;
+   record->call.info.launch_grid.indirect = NULL;
+   pipe_resource_reference(&record->call.info.launch_grid.indirect, info->indirect);
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->launch_grid(pipe, info);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1200,23 +1169,25 @@ dd_context_resource_copy_region(struct pipe_context *_pipe,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
-
-   call.type = CALL_RESOURCE_COPY_REGION;
-   call.info.resource_copy_region.dst = dst;
-   call.info.resource_copy_region.dst_level = dst_level;
-   call.info.resource_copy_region.dstx = dstx;
-   call.info.resource_copy_region.dsty = dsty;
-   call.info.resource_copy_region.dstz = dstz;
-   call.info.resource_copy_region.src = src;
-   call.info.resource_copy_region.src_level = src_level;
-   call.info.resource_copy_region.src_box = *src_box;
-
-   dd_before_draw(dctx);
+   struct dd_draw_record *record = dd_create_record(dctx);
+
+   record->call.type = CALL_RESOURCE_COPY_REGION;
+   record->call.info.resource_copy_region.dst = NULL;
+   pipe_resource_reference(&record->call.info.resource_copy_region.dst, dst);
+   record->call.info.resource_copy_region.dst_level = dst_level;
+   record->call.info.resource_copy_region.dstx = dstx;
+   record->call.info.resource_copy_region.dsty = dsty;
+   record->call.info.resource_copy_region.dstz = dstz;
+   record->call.info.resource_copy_region.src = NULL;
+   pipe_resource_reference(&record->call.info.resource_copy_region.src, src);
+   record->call.info.resource_copy_region.src_level = src_level;
+   record->call.info.resource_copy_region.src_box = *src_box;
+
+   dd_before_draw(dctx, record);
    pipe->resource_copy_region(pipe,
                               dst, dst_level, dstx, dsty, dstz,
                               src, src_level, src_box);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1224,14 +1195,18 @@ dd_context_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_BLIT;
-   call.info.blit = *info;
+   record->call.type = CALL_BLIT;
+   record->call.info.blit = *info;
+   record->call.info.blit.dst.resource = NULL;
+   pipe_resource_reference(&record->call.info.blit.dst.resource, info->dst.resource);
+   record->call.info.blit.src.resource = NULL;
+   pipe_resource_reference(&record->call.info.blit.src.resource, info->src.resource);
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->blit(pipe, info);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static boolean
@@ -1245,21 +1220,22 @@ dd_context_generate_mipmap(struct pipe_context *_pipe,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
    boolean result;
 
-   call.type = CALL_GENERATE_MIPMAP;
-   call.info.generate_mipmap.res = res;
-   call.info.generate_mipmap.format = format;
-   call.info.generate_mipmap.base_level = base_level;
-   call.info.generate_mipmap.last_level = last_level;
-   call.info.generate_mipmap.first_layer = first_layer;
-   call.info.generate_mipmap.last_layer = last_layer;
+   record->call.type = CALL_GENERATE_MIPMAP;
+   record->call.info.generate_mipmap.res = NULL;
+   pipe_resource_reference(&record->call.info.generate_mipmap.res, res);
+   record->call.info.generate_mipmap.format = format;
+   record->call.info.generate_mipmap.base_level = base_level;
+   record->call.info.generate_mipmap.last_level = last_level;
+   record->call.info.generate_mipmap.first_layer = first_layer;
+   record->call.info.generate_mipmap.last_layer = last_layer;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    result = pipe->generate_mipmap(pipe, res, format, base_level, last_level,
                                   first_layer, last_layer);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
    return result;
 }
 
@@ -1275,25 +1251,25 @@ dd_context_get_query_result_resource(struct pipe_context *_pipe,
    struct dd_context *dctx = dd_context(_pipe);
    struct dd_query *dquery = dd_query(query);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
-
-   call.type = CALL_GET_QUERY_RESULT_RESOURCE;
-   call.info.get_query_result_resource.query = query;
-   call.info.get_query_result_resource.wait = wait;
-   call.info.get_query_result_resource.result_type = result_type;
-   call.info.get_query_result_resource.index = index;
-   call.info.get_query_result_resource.resource = resource;
-   call.info.get_query_result_resource.offset = offset;
-
-   /* In pipelined mode, the query may be deleted by the time we need to
-    * print it.
-    */
-   call.info.get_query_result_resource.query_type = dquery->type;
-
-   dd_before_draw(dctx);
+   struct dd_draw_record *record = dd_create_record(dctx);
+
+   record->call.type = CALL_GET_QUERY_RESULT_RESOURCE;
+   record->call.info.get_query_result_resource.query = query;
+   record->call.info.get_query_result_resource.wait = wait;
+   record->call.info.get_query_result_resource.result_type = result_type;
+   record->call.info.get_query_result_resource.index = index;
+   record->call.info.get_query_result_resource.resource = NULL;
+   pipe_resource_reference(&record->call.info.get_query_result_resource.resource,
+                           resource);
+   record->call.info.get_query_result_resource.offset = offset;
+
+   /* The query may be deleted by the time we need to print it. */
+   record->call.info.get_query_result_resource.query_type = dquery->type;
+
+   dd_before_draw(dctx, record);
    pipe->get_query_result_resource(pipe, dquery->query, wait,
                                    result_type, index, resource, offset);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1302,14 +1278,15 @@ dd_context_flush_resource(struct pipe_context *_pipe,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_FLUSH_RESOURCE;
-   call.info.flush_resource = resource;
+   record->call.type = CALL_FLUSH_RESOURCE;
+   record->call.info.flush_resource = NULL;
+   pipe_resource_reference(&record->call.info.flush_resource, resource);
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->flush_resource(pipe, resource);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1319,17 +1296,17 @@ dd_context_clear(struct pipe_context *_pipe, unsigned buffers,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR;
-   call.info.clear.buffers = buffers;
-   call.info.clear.color = *color;
-   call.info.clear.depth = depth;
-   call.info.clear.stencil = stencil;
+   record->call.type = CALL_CLEAR;
+   record->call.info.clear.buffers = buffers;
+   record->call.info.clear.color = *color;
+   record->call.info.clear.depth = depth;
+   record->call.info.clear.stencil = stencil;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear(pipe, buffers, color, depth, stencil);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1342,14 +1319,14 @@ dd_context_clear_render_target(struct pipe_context *_pipe,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR_RENDER_TARGET;
+   record->call.type = CALL_CLEAR_RENDER_TARGET;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height,
                              render_condition_enabled);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1361,15 +1338,15 @@ dd_context_clear_depth_stencil(struct pipe_context *_pipe,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR_DEPTH_STENCIL;
+   record->call.type = CALL_CLEAR_DEPTH_STENCIL;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil,
                              dstx, dsty, width, height,
                              render_condition_enabled);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1379,18 +1356,19 @@ dd_context_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR_BUFFER;
-   call.info.clear_buffer.res = res;
-   call.info.clear_buffer.offset = offset;
-   call.info.clear_buffer.size = size;
-   call.info.clear_buffer.clear_value = clear_value;
-   call.info.clear_buffer.clear_value_size = clear_value_size;
+   record->call.type = CALL_CLEAR_BUFFER;
+   record->call.info.clear_buffer.res = NULL;
+   pipe_resource_reference(&record->call.info.clear_buffer.res, res);
+   record->call.info.clear_buffer.offset = offset;
+   record->call.info.clear_buffer.size = size;
+   record->call.info.clear_buffer.clear_value = clear_value;
+   record->call.info.clear_buffer.clear_value_size = clear_value_size;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear_buffer(pipe, res, offset, size, clear_value, clear_value_size);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
@@ -1402,13 +1380,13 @@ dd_context_clear_texture(struct pipe_context *_pipe,
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR_TEXTURE;
+   record->call.type = CALL_CLEAR_TEXTURE;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear_texture(pipe, res, level, box, data);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 void
index 252dbff..d1965be 100644 (file)
 #include "pipe/p_screen.h"
 #include "dd_util.h"
 #include "os/os_thread.h"
+#include "util/list.h"
 #include "util/u_log.h"
+#include "util/u_queue.h"
 
-enum dd_mode {
-   DD_DETECT_HANGS,
-   DD_DETECT_HANGS_PIPELINED,
+struct dd_context;
+
+enum dd_dump_mode {
+   DD_DUMP_ONLY_HANGS,
    DD_DUMP_ALL_CALLS,
    DD_DUMP_APITRACE_CALL,
 };
@@ -47,8 +50,8 @@ struct dd_screen
    struct pipe_screen base;
    struct pipe_screen *screen;
    unsigned timeout_ms;
-   enum dd_mode mode;
-   bool no_flush;
+   enum dd_dump_mode dump_mode;
+   bool flush_always;
    bool verbose;
    unsigned skip_count;
    unsigned apitrace_dump_call;
@@ -218,13 +221,19 @@ struct dd_draw_state_copy
 };
 
 struct dd_draw_record {
-   struct dd_draw_record *next;
+   struct list_head list;
+   struct dd_context *dctx;
+
+   unsigned draw_call;
 
-   int64_t timestamp;
-   uint32_t sequence_no;
+   struct pipe_fence_handle *prev_bottom_of_pipe;
+   struct pipe_fence_handle *top_of_pipe;
+   struct pipe_fence_handle *bottom_of_pipe;
 
    struct dd_call call;
    struct dd_draw_state_copy draw_state;
+
+   struct util_queue_fence driver_finished;
    struct u_log_page *log_page;
 };
 
@@ -252,17 +261,16 @@ struct dd_context
     *
     * An independent, separate thread loops over the list of records and checks
     * their fences. Records with signalled fences are freed. On fence timeout,
-    * the thread dumps the record of the oldest unsignalled fence.
+    * the thread dumps the records of in-flight draws.
     */
    thrd_t thread;
    mtx_t mutex;
-   int kill_thread;
-   struct pipe_resource *fence;
-   struct pipe_transfer *fence_transfer;
-   uint32_t *mapped_fence;
-   uint32_t sequence_no;
-   struct dd_draw_record *records;
-   int max_log_buffer_size;
+   cnd_t cond;
+   struct dd_draw_record *record_pending; /* currently inside the driver */
+   struct list_head records; /* oldest record first */
+   unsigned num_records;
+   bool kill_thread;
+   bool api_stalled;
 };
 
 
@@ -271,8 +279,11 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe);
 
 void
 dd_init_draw_functions(struct dd_context *dctx);
+
+void
+dd_thread_join(struct dd_context *dctx);
 int
-dd_thread_pipelined_hang_detect(void *input);
+dd_thread_main(void *input);
 
 FILE *
 dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number);
index caf31f6..11d1d8c 100644 (file)
@@ -28,6 +28,7 @@
 #include "dd_pipe.h"
 #include "dd_public.h"
 #include "util/u_memory.h"
+#include <ctype.h>
 #include <stdio.h>
 
 
@@ -381,15 +382,55 @@ dd_screen_destroy(struct pipe_screen *_screen)
    FREE(dscreen);
 }
 
+static void
+skip_space(const char **p)
+{
+   while (isspace(**p))
+      (*p)++;
+}
+
+static bool
+match_word(const char **cur, const char *word)
+{
+   size_t len = strlen(word);
+   if (strncmp(*cur, word, len) != 0)
+      return false;
+
+   const char *p = *cur + len;
+   if (*p) {
+      if (!isspace(*p))
+         return false;
+
+      *cur = p + 1;
+   } else {
+      *cur = p;
+   }
+
+   return true;
+}
+
+static bool
+match_uint(const char **cur, unsigned *value)
+{
+   char *end;
+   unsigned v = strtoul(*cur, &end, 0);
+   if (end == *cur || (*end && !isspace(*end)))
+      return false;
+   *cur = end;
+   *value = v;
+   return true;
+}
+
 struct pipe_screen *
 ddebug_screen_create(struct pipe_screen *screen)
 {
    struct dd_screen *dscreen;
    const char *option;
-   bool no_flush;
-   unsigned timeout = 0;
+   bool flush = false;
+   bool verbose = false;
+   unsigned timeout = 1000;
    unsigned apitrace_dump_call = 0;
-   enum dd_mode mode;
+   enum dd_dump_mode mode = DD_DUMP_ONLY_HANGS;
 
    option = debug_get_option("GALLIUM_DDEBUG", NULL);
    if (!option)
@@ -400,53 +441,70 @@ ddebug_screen_create(struct pipe_screen *screen)
       puts("");
       puts("Usage:");
       puts("");
-      puts("  GALLIUM_DDEBUG=\"always [noflush] [verbose]\"");
-      puts("    Flush and dump context and driver information after every draw call into");
-      puts("    $HOME/"DD_DIR"/.");
+      puts("  GALLIUM_DDEBUG=\"[<timeout in ms>] [(always|apitrace <call#)] [flush] [verbose]\"");
+      puts("  GALLIUM_DDEBUG_SKIP=[count]");
       puts("");
-      puts("  GALLIUM_DDEBUG=\"[timeout in ms] [noflush] [verbose]\"");
-      puts("    Flush and detect a device hang after every draw call based on the given");
-      puts("    fence timeout and dump context and driver information into");
-      puts("    $HOME/"DD_DIR"/ when a hang is detected.");
+      puts("Dump context and driver information of draw calls into");
+      puts("$HOME/"DD_DIR"/. By default, watch for GPU hangs and only dump information");
+      puts("about draw calls related to the hang.");
       puts("");
-      puts("  GALLIUM_DDEBUG=\"pipelined [timeout in ms] [verbose]\"");
-      puts("    Detect a device hang after every draw call based on the given fence");
-      puts("    timeout without flushes and dump context and driver information into");
-      puts("    $HOME/"DD_DIR"/ when a hang is detected.");
+      puts("<timeout in ms>");
+      puts("  Change the default timeout for GPU hang detection (default=1000ms).");
+      puts("  Setting this to 0 will disable GPU hang detection entirely.");
       puts("");
-      puts("  GALLIUM_DDEBUG=\"apitrace [call#] [verbose]\"");
-      puts("    Dump apitrace draw call information into $HOME/"DD_DIR"/. Implies 'noflush'.");
+      puts("always");
+      puts("  Dump information about all draw calls.");
       puts("");
-      puts("  If 'noflush' is specified, do not flush on every draw call. In hang");
-      puts("  detection mode, this only detect hangs in pipe->flush.");
-      puts("  If 'verbose' is specified, additional information is written to stderr.");
+      puts("apitrace <call#>");
+      puts("  Dump information about the draw call corresponding to the given");
+      puts("  apitrace call number and exit.");
       puts("");
-      puts("  GALLIUM_DDEBUG_SKIP=[count]");
-      puts("    Skip flush and hang detection for the given initial number of draw calls.");
+      puts("flush");
+      puts("  Flush after every draw call.");
+      puts("");
+      puts("verbose");
+      puts("  Write additional information to stderr.");
+      puts("");
+      puts("GALLIUM_DDEBUG_SKIP=count");
+      puts("  Skip dumping on the first count draw calls (only relevant with 'always').");
       puts("");
       exit(0);
    }
 
-   no_flush = strstr(option, "noflush") != NULL;
-
-   if (!strncmp(option, "always", 6)) {
-      mode = DD_DUMP_ALL_CALLS;
-   } else if (!strncmp(option, "apitrace", 8)) {
-      mode = DD_DUMP_APITRACE_CALL;
-      no_flush = true;
-
-      if (sscanf(option+8, "%u", &apitrace_dump_call) != 1)
-         return screen;
-   } else if (!strncmp(option, "pipelined", 9)) {
-      mode = DD_DETECT_HANGS_PIPELINED;
-
-      if (sscanf(option+10, "%u", &timeout) != 1)
-         return screen;
-   } else {
-      mode = DD_DETECT_HANGS;
-
-      if (sscanf(option, "%u", &timeout) != 1)
-         return screen;
+   for (;;) {
+      skip_space(&option);
+      if (!*option)
+         break;
+
+      if (match_word(&option, "always")) {
+         if (mode == DD_DUMP_APITRACE_CALL) {
+            printf("ddebug: both 'always' and 'apitrace' specified\n");
+            exit(1);
+         }
+
+         mode = DD_DUMP_ALL_CALLS;
+      } else if (match_word(&option, "flush")) {
+         flush = true;
+      } else if (match_word(&option, "verbose")) {
+         verbose = true;
+      } else if (match_word(&option, "apitrace")) {
+         if (mode != DD_DUMP_ONLY_HANGS) {
+            printf("ddebug: 'apitrace' can only appear once and not mixed with 'always'\n");
+            exit(1);
+         }
+
+         if (!match_uint(&option, &apitrace_dump_call)) {
+            printf("ddebug: expected call number after 'apitrace'\n");
+            exit(1);
+         }
+
+         mode = DD_DUMP_APITRACE_CALL;
+      } else if (match_uint(&option, &timeout)) {
+         /* no-op */
+      } else {
+         printf("ddebug: bad options: %s\n", option);
+         exit(1);
+      }
    }
 
    dscreen = CALLOC_STRUCT(dd_screen);
@@ -496,27 +554,28 @@ ddebug_screen_create(struct pipe_screen *screen)
 
    dscreen->screen = screen;
    dscreen->timeout_ms = timeout;
-   dscreen->mode = mode;
-   dscreen->no_flush = no_flush;
-   dscreen->verbose = strstr(option, "verbose") != NULL;
+   dscreen->dump_mode = mode;
+   dscreen->flush_always = flush;
+   dscreen->verbose = verbose;
    dscreen->apitrace_dump_call = apitrace_dump_call;
 
-   switch (dscreen->mode) {
+   switch (dscreen->dump_mode) {
    case DD_DUMP_ALL_CALLS:
       fprintf(stderr, "Gallium debugger active. Logging all calls.\n");
       break;
-   case DD_DETECT_HANGS:
-   case DD_DETECT_HANGS_PIPELINED:
-      fprintf(stderr, "Gallium debugger active. "
-              "The hang detection timeout is %i ms.\n", timeout);
-      break;
    case DD_DUMP_APITRACE_CALL:
       fprintf(stderr, "Gallium debugger active. Going to dump an apitrace call.\n");
       break;
    default:
-      assert(0);
+      fprintf(stderr, "Gallium debugger active.\n");
+      break;
    }
 
+   if (dscreen->timeout_ms > 0)
+      fprintf(stderr, "Hang detection timeout is %ums.\n", dscreen->timeout_ms);
+   else
+      fprintf(stderr, "Hang detection is disabled.\n");
+
    dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0);
    if (dscreen->skip_count > 0) {
       fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n",