i965: perf: flush batchbuffers at the beginning of queries

author Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Tue, 25 Jul 2017 16:49:22 +0000 (17:49 +0100)

committer Emil Velikov <emil.l.velikov@gmail.com>

Wed, 2 Aug 2017 23:19:06 +0000 (00:19 +0100)
author Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Tue, 25 Jul 2017 16:49:22 +0000 (17:49 +0100)
committer Emil Velikov <emil.l.velikov@gmail.com>
Wed, 2 Aug 2017 23:19:06 +0000 (00:19 +0100)
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c

index 95f112e..2f49efa 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1001,6 +1001,14 @@ brw_begin_perf_query(struct gl_context *ctx,
        obj->oa.begin_report_id = brw->perfquery.next_query_start_report_id;
        brw->perfquery.next_query_start_report_id += 2;
  
+      /* We flush the batchbuffer here to minimize the chances that MI_RPC
+       * delimiting commands end up in different batchbuffers. If that's the
+       * case, the measurement will include the time it takes for the kernel
+       * scheduler to load a new request into the hardware. This is manifested in
+       * tools like frameretrace by spikes in the "GPU Core Clocks" counter.
+       */
+      intel_batchbuffer_flush(brw);
+
        /* Take a starting OA counter snapshot. */
        emit_mi_report_perf_count(brw, obj->oa.bo, 0,
                                  obj->oa.begin_report_id);
author	Lionel Landwerlin <lionel.g.landwerlin@intel.com>
	Tue, 25 Jul 2017 16:49:22 +0000 (17:49 +0100)
committer	Emil Velikov <emil.l.velikov@gmail.com>
	Wed, 2 Aug 2017 23:19:06 +0000 (00:19 +0100)