OSDN Git Service

i965/gen6+: Add support for GL_ARB_timer_query.
authorEric Anholt <eric@anholt.net>
Wed, 18 Jul 2012 17:18:26 +0000 (10:18 -0700)
committerEric Anholt <eric@anholt.net>
Sun, 26 Aug 2012 17:40:33 +0000 (10:40 -0700)
Needs updated libdrm.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
configure.ac
docs/GL3.txt
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_queryobj.c
src/mesa/drivers/dri/intel/intel_extensions.c
src/mesa/drivers/dri/intel/intel_reg.h

index 97e53b3..c30bcf0 100644 (file)
@@ -29,7 +29,7 @@ LT_INIT([disable-static])
 dnl Versions for external dependencies
 LIBDRM_REQUIRED=2.4.24
 LIBDRM_RADEON_REQUIRED=2.4.38
-LIBDRM_INTEL_REQUIRED=2.4.37
+LIBDRM_INTEL_REQUIRED=2.4.38
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED=2.4.33
 DRI2PROTO_REQUIRED=2.6
index c1e2da8..1d55282 100644 (file)
@@ -80,7 +80,7 @@ GL_ARB_sampler_objects                                DONE (i965, r300, r600)
 GL_ARB_shader_bit_encoding                            DONE
 GL_ARB_texture_rgb10_a2ui                             DONE (i965, r600)
 GL_ARB_texture_swizzle                                DONE (same as EXT version) (i965, r300, r600, swrast)
-GL_ARB_timer_query                                    DONE
+GL_ARB_timer_query                                    DONE (i965)
 GL_ARB_instanced_arrays                               DONE (i965, r300, r600)
 GL_ARB_vertex_type_2_10_10_10_rev                     DONE (r600)
 
index e72d5b6..7f8197d 100644 (file)
@@ -256,6 +256,8 @@ brwCreateContext(int api,
    if (intel->gen >= 6)
        ctx->Const.QuadsFollowProvokingVertexConvention = false;
 
+   ctx->Const.QueryCounterBits.Timestamp = 36;
+
    if (intel->is_g4x || intel->gen >= 5) {
       brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
index 3f9e065..87c8dac 100644 (file)
@@ -41,6 +41,7 @@
 #include "main/imports.h"
 
 #include "brw_context.h"
+#include "brw_defines.h"
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
 #include "intel_reg.h"
@@ -155,6 +156,32 @@ brw_queryobj_get_results(struct gl_context *ctx,
         query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32));
       break;
 
+   case GL_TIMESTAMP:
+      if (intel->gen >= 6) {
+         /* Our timer is a clock that increments every 80ns (regardless of
+          * other clock scaling in the system).  The timestamp register we can
+          * read for glGetTimestamp() masks out the top 32 bits, so we do that
+          * here too to let the two counters be compared against each other.
+          *
+          * If we just multiplied that 32 bits of data by 80, it would roll
+          * over at a non-power-of-two, so an application couldn't use
+          * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
+          * report 36 bits and truncate at that (rolling over 5 times as often
+          * as the HW counter), and when the 32-bit counter rolls over, it
+          * happens to also be at a rollover in the reported value from near
+          * (1<<36) to 0.
+          *
+          * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
+          * rolls over every ~69 seconds.
+          */
+        query->Base.Result = 80 * (results[1] & 0xffffffff);
+         query->Base.Result &= (1ull << 36) - 1;
+      } else {
+        query->Base.Result = 1000 * (results[1] >> 32);
+      }
+
+      break;
+
    case GL_SAMPLES_PASSED_ARB:
       /* Map and count the pixels from the current query BO */
       for (i = query->first_index; i <= query->last_index; i++) {
@@ -262,6 +289,12 @@ brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
    struct brw_query_object *query = (struct brw_query_object *)q;
 
    switch (query->Base.Target) {
+   case GL_TIMESTAMP:
+      drm_intel_bo_unreference(query->bo);
+      query->bo = drm_intel_bo_alloc(intel->bufmgr, "timer query",
+                                    4096, 4096);
+      /* FALLTHROUGH */
+
    case GL_TIME_ELAPSED_EXT:
       write_timestamp(intel, query->bo, 1);
       intel_batchbuffer_flush(intel);
@@ -404,6 +437,22 @@ brw_emit_query_end(struct brw_context *brw)
    brw->query.index++;
 }
 
+static uint64_t
+brw_get_timestamp(struct gl_context *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   uint64_t result = 0;
+
+   drm_intel_reg_read(intel->bufmgr, TIMESTAMP, &result);
+
+   /* See logic in brw_queryobj_get_results() */
+   result = result >> 32;
+   result *= 80;
+   result &= (1ull << 36) - 1;
+
+   return result;
+}
+
 void brw_init_queryobj_functions(struct dd_function_table *functions)
 {
    functions->NewQueryObject = brw_new_query_object;
@@ -412,4 +461,5 @@ void brw_init_queryobj_functions(struct dd_function_table *functions)
    functions->EndQuery = brw_end_query;
    functions->CheckQuery = brw_check_query;
    functions->WaitQuery = brw_wait_query;
+   functions->GetTimestamp = brw_get_timestamp;
 }
index df7abff..66134a4 100755 (executable)
@@ -31,6 +31,7 @@
 #include "intel_chipset.h"
 #include "intel_context.h"
 #include "intel_extensions.h"
+#include "intel_reg.h"
 #include "utils.h"
 
 /**
@@ -113,6 +114,13 @@ intelInitExtensions(struct gl_context *ctx)
    if (intel->gen >= 5)
       ctx->Extensions.EXT_timer_query = true;
 
+   if (intel->gen >= 6) {
+      uint64_t dummy;
+      /* Test if the kernel has the ioctl. */
+      if (drm_intel_reg_read(intel->bufmgr, TIMESTAMP, &dummy) == 0)
+         ctx->Extensions.ARB_timer_query = true;
+   }
+
    if (intel->gen >= 4) {
       ctx->Extensions.ARB_color_buffer_float = true;
       ctx->Extensions.ARB_depth_buffer_float = true;
index 2c75a8e..53b1cb9 100644 (file)
 #define SO_NUM_PRIMS_WRITTEN1_IVB      0x5208
 #define SO_NUM_PRIMS_WRITTEN2_IVB      0x5210
 #define SO_NUM_PRIMS_WRITTEN3_IVB      0x5218
+
+#define TIMESTAMP                       0x2358