From 6a9c151dbb87a10b6d51c451a5a277d646d08857 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 10 Nov 2015 01:27:15 +0100 Subject: [PATCH] nv50: add compute-related MP perf counters on G84+ These compute-related MP performance counters have been reverse engineered using CUPTI which is part of NVIDIA CUDA. As for nvc0, we use a compute kernel to read out those performance counters, and the command stream to configure them. Note that Tesla only exposes 4 MP performance counters, while Fermi has 8. Only G84+ is supported because G80 is an old and weird card. Tested on G84, G96, G200, MCP79 and GT218 with glxgears, glxspheres64, xonotic-glx, heaven and valley. Signed-off-by: Samuel Pitoiset Tested-by: Pierre Moreau Acked-by: Ilia Mirkin --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nv50/nv50_context.h | 3 +- src/gallium/drivers/nouveau/nv50/nv50_query.c | 23 ++ src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 34 +- src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 16 + .../drivers/nouveau/nv50/nv50_query_hw_sm.c | 417 +++++++++++++++++++++ .../drivers/nouveau/nv50/nv50_query_hw_sm.h | 45 +++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.h | 9 + 9 files changed, 548 insertions(+), 2 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index c2ff8e9b46e..a1aa13587a1 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -77,6 +77,8 @@ NV50_C_SOURCES := \ nv50/nv50_query.h \ nv50/nv50_query_hw.c \ nv50/nv50_query_hw.h \ + nv50/nv50_query_hw_sm.c \ + nv50/nv50_query_hw_sm.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index fbafe029948..2cebcd99423 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -67,7 +67,8 @@ /* compute bufctx (during launch_grid) */ #define NV50_BIND_CP_GLOBAL 0 #define NV50_BIND_CP_SCREEN 1 -#define NV50_BIND_CP_COUNT 2 +#define NV50_BIND_CP_QUERY 2 +#define NV50_BIND_CP_COUNT 3 /* bufctx for other operations */ #define NV50_BIND_2D 0 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index dd9b85b7208..c31bf728c61 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -153,3 +153,26 @@ nv50_init_query_functions(struct nv50_context *nv50) pipe->get_query_result = nv50_get_query_result; pipe->render_condition = nv50_render_condition; } + +int +nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int num_hw_queries = 0; + + num_hw_queries = nv50_hw_get_driver_query_info(screen, 0, NULL); + + if (!info) + return num_hw_queries; + + /* Init default values. */ + info->name = "this_is_not_the_query_you_are_looking_for"; + info->query_type = 0xdeadd01d; + info->max_value.u64 = 0; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->group_id = -1; + + return nv50_hw_get_driver_query_info(screen, id, info); +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index 945ce7abe50..23108acbef5 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -25,6 +25,7 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_query_hw_sm.h" #include "nv_object.xml.h" #define NV50_HW_QUERY_STATE_READY 0 @@ -41,7 +42,7 @@ #define NV50_HW_QUERY_ALLOC_SPACE 256 -static bool +bool nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) { @@ -122,6 +123,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); + if (hq->funcs && hq->funcs->begin_query) + return hq->funcs->begin_query(nv50, hq); + /* For occlusion queries we have to change the storage, because a previous * query might set the initial render condition to false even *after* we re- * initialized it to true. @@ -193,6 +197,11 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); + if (hq->funcs && hq->funcs->end_query) { + hq->funcs->end_query(nv50, hq); + return; + } + hq->state = NV50_HW_QUERY_STATE_ENDED; switch (q->type) { @@ -261,6 +270,9 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, uint64_t *data64 = (uint64_t *)hq->data; int i; + if (hq->funcs && hq->funcs->get_query_result) + return hq->funcs->get_query_result(nv50, hq, wait, result); + if (hq->state != NV50_HW_QUERY_STATE_READY) nv50_hw_query_update(q); @@ -331,6 +343,12 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) struct nv50_hw_query *hq; struct nv50_query *q; + hq = nv50_hw_sm_create_query(nv50, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nv50_query *)hq; + } + hq = CALLOC_STRUCT(nv50_hw_query); if (!hq) return NULL; @@ -375,6 +393,20 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) return q; } +int +nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int num_hw_sm_queries = 0; + + num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL); + + if (!info) + return num_hw_sm_queries; + + return nv50_hw_sm_get_driver_query_info(screen, id, info); +} + void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, struct nv50_query *q, unsigned result_offset) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h index 294c67de9a4..82ec6bd2d96 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -8,8 +8,19 @@ #define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) +struct nv50_hw_query; + +struct nv50_hw_query_funcs { + void (*destroy_query)(struct nv50_context *, struct nv50_hw_query *); + boolean (*begin_query)(struct nv50_context *, struct nv50_hw_query *); + void (*end_query)(struct nv50_context *, struct nv50_hw_query *); + boolean (*get_query_result)(struct nv50_context *, struct nv50_hw_query *, + boolean, union pipe_query_result *); +}; + struct nv50_hw_query { struct nv50_query base; + const struct nv50_hw_query_funcs *funcs; uint32_t *data; uint32_t sequence; struct nouveau_bo *bo; @@ -31,6 +42,11 @@ nv50_hw_query(struct nv50_query *q) struct nv50_query * nv50_hw_create_query(struct nv50_context *, unsigned, unsigned); +int +nv50_hw_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +bool +nv50_hw_query_allocate(struct nv50_context *, struct nv50_query *, int); void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, struct nv50_query *, unsigned); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c new file mode 100644 index 00000000000..e75b428fb12 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -0,0 +1,417 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING + +#include "nv50/nv50_context.h" +#include "nv50/nv50_query_hw_sm.h" + +#include "nv_object.xml.h" +#include "nv50/nv50_compute.xml.h" + +/* === PERFORMANCE MONITORING COUNTERS for NV84+ === */ + +/* NOTE: intentionally using the same names as NV */ +static const char *nv50_hw_sm_query_names[] = +{ + "branch", + "divergent_branch", + "instructions", + "prof_trigger_00", + "prof_trigger_01", + "prof_trigger_02", + "prof_trigger_03", + "prof_trigger_04", + "prof_trigger_05", + "prof_trigger_06", + "prof_trigger_07", + "sm_cta_launched", + "warp_serialize", +}; + +static const uint64_t nv50_read_hw_sm_counters_code[] = +{ + /* and b32 $r0 $r0 0x0000ffff + * add b32 $c0 $r0 $r0 $r0 + * (lg $c0) ret + * mov $r0 $pm0 + * mov $r1 $pm1 + * mov $r2 $pm2 + * mov $r3 $pm3 + * mov $r4 $physid + * ld $r5 b32 s[0x10] + * ld $r6 b32 s[0x14] + * and b32 $r4 $r4 0x000f0000 + * shr u32 $r4 $r4 0x10 + * mul $r4 u24 $r4 0x14 + * add b32 $r5 $r5 $r4 + * st b32 g15[$r5] $r0 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r1 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r2 + * add b32 $r5 $r5 0x04 + * st b32 g15[$r5] $r3 + * add b32 $r5 $r5 0x04 + * exit st b32 g15[$r5] $r6 */ + 0x00000fffd03f0001ULL, + 0x040007c020000001ULL, + 0x0000028030000003ULL, + 0x6001078000000001ULL, + 0x6001478000000005ULL, + 0x6001878000000009ULL, + 0x6001c7800000000dULL, + 0x6000078000000011ULL, + 0x4400c78010000815ULL, + 0x4400c78010000a19ULL, + 0x0000f003d0000811ULL, + 0xe410078030100811ULL, + 0x0000000340540811ULL, + 0x0401078020000a15ULL, + 0xa0c00780d00f0a01ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a05ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a09ULL, + 0x0000000320048a15ULL, + 0xa0c00780d00f0a0dULL, + 0x0000000320048a15ULL, + 0xa0c00781d00f0a19ULL, +}; + +struct nv50_hw_sm_counter_cfg +{ + uint32_t mode : 4; /* LOGOP, LOGOP_PULSE */ + uint32_t unit : 8; /* UNK[0-5] */ + uint32_t sig : 8; /* signal selection */ +}; + +struct nv50_hw_sm_query_cfg +{ + struct nv50_hw_sm_counter_cfg ctr[4]; + uint8_t num_counters; +}; + +#define _Q(n, m, u, s) [NV50_HW_SM_QUERY_##n] = { { { NV50_COMPUTE_MP_PM_CONTROL_MODE_##m, NV50_COMPUTE_MP_PM_CONTROL_UNIT_##u, s, }, {}, {}, {} }, 1 } + +/* ==== Compute capability 1.1 (G84+) ==== */ +static const struct nv50_hw_sm_query_cfg sm11_hw_sm_queries[] = +{ + _Q(BRANCH, LOGOP, UNK4, 0x02), + _Q(DIVERGENT_BRANCH, LOGOP, UNK4, 0x09), + _Q(INSTRUCTIONS, LOGOP, UNK4, 0x04), + _Q(PROF_TRIGGER_0, LOGOP, UNK1, 0x26), + _Q(PROF_TRIGGER_1, LOGOP, UNK1, 0x27), + _Q(PROF_TRIGGER_2, LOGOP, UNK1, 0x28), + _Q(PROF_TRIGGER_3, LOGOP, UNK1, 0x29), + _Q(PROF_TRIGGER_4, LOGOP, UNK1, 0x2a), + _Q(PROF_TRIGGER_5, LOGOP, UNK1, 0x2b), + _Q(PROF_TRIGGER_6, LOGOP, UNK1, 0x2c), + _Q(PROF_TRIGGER_7, LOGOP, UNK1, 0x2d), + _Q(SM_CTA_LAUNCHED, LOGOP, UNK1, 0x33), + _Q(WARP_SERIALIZE, LOGOP, UNK0, 0x0b), +}; + +static inline uint16_t nv50_hw_sm_get_func(uint8_t slot) +{ + switch (slot) { + case 0: return 0xaaaa; + case 1: return 0xcccc; + case 2: return 0xf0f0; + case 3: return 0xff00; + } + return 0; +} + +static const struct nv50_hw_sm_query_cfg * +nv50_hw_sm_query_get_cfg(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + return &sm11_hw_sm_queries[q->type - NV50_HW_SM_QUERY(0)]; +} + +static void +nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_query *q = &hq->base; + q->funcs->destroy_query(nv50, q); +} + +static boolean +nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + const struct nv50_hw_sm_query_cfg *cfg; + uint16_t func; + int i, c; + + cfg = nv50_hw_sm_query_get_cfg(nv50, hq); + + /* check if we have enough free counter slots */ + if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) { + NOUVEAU_ERR("Not enough free MP counter slots !\n"); + return false; + } + + assert(cfg->num_counters <= 4); + PUSH_SPACE(push, 4 * 4); + + /* set sequence field to 0 (used to check if result is available) */ + for (i = 0; i < screen->MPsInTP; ++i) { + const unsigned b = (0x14 / 4) * i; + hq->data[b + 16] = 0; + } + hq->sequence++; + + for (i = 0; i < cfg->num_counters; i++) { + screen->pm.num_hw_sm_active++; + + /* find free counter slots */ + for (c = 0; c < 4; ++c) { + if (!screen->pm.mp_counter[c]) { + hsq->ctr[i] = c; + screen->pm.mp_counter[c] = hsq; + break; + } + } + + /* select func to aggregate counters */ + func = nv50_hw_sm_get_func(c); + + /* configure and reset the counter(s) */ + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) + | cfg->ctr[i].unit | cfg->ctr[i].mode); + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1); + PUSH_DATA (push, 0); + } + return true; +} + +static void +nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) +{ + struct nv50_screen *screen = nv50->screen; + struct pipe_context *pipe = &nv50->base.pipe; + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + uint32_t mask; + uint32_t input[3]; + const uint block[3] = { 32, 1, 1 }; + const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 }; + int c; + + if (unlikely(!screen->pm.prog)) { + struct nv50_program *prog = CALLOC_STRUCT(nv50_program); + prog->type = PIPE_SHADER_COMPUTE; + prog->translated = true; + prog->max_gpr = 7; + prog->parm_size = 8; + prog->code = (uint32_t *)nv50_read_hw_sm_counters_code; + prog->code_size = sizeof(nv50_read_hw_sm_counters_code); + screen->pm.prog = prog; + } + + /* disable all counting */ + PUSH_SPACE(push, 8); + for (c = 0; c < 4; c++) { + if (screen->pm.mp_counter[c]) { + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + PUSH_DATA (push, 0); + } + } + + /* release counters for this query */ + for (c = 0; c < 4; c++) { + if (screen->pm.mp_counter[c] == hsq) { + screen->pm.num_hw_sm_active--; + screen->pm.mp_counter[c] = NULL; + } + } + + BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR, + hq->bo); + + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + + pipe->bind_compute_state(pipe, screen->pm.prog); + input[0] = hq->bo->offset + hq->base_offset; + input[1] = hq->sequence; + pipe->launch_grid(pipe, block, grid, 0, input); + + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY); + + /* re-active other counters */ + PUSH_SPACE(push, 8); + mask = 0; + for (c = 0; c < 4; c++) { + const struct nv50_hw_sm_query_cfg *cfg; + unsigned i; + + hsq = screen->pm.mp_counter[c]; + if (!hsq) + continue; + + cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base); + for (i = 0; i < cfg->num_counters; i++) { + uint16_t func; + + if (mask & (1 << hsq->ctr[i])) + break; + + mask |= 1 << hsq->ctr[i]; + func = nv50_hw_sm_get_func(hsq->ctr[i]); + + BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1); + PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) + | cfg->ctr[i].unit | cfg->ctr[i].mode); + } + } +} + +static inline bool +nv50_hw_sm_query_read_data(uint32_t count[32][4], + struct nv50_context *nv50, bool wait, + struct nv50_hw_query *hq, + const struct nv50_hw_sm_query_cfg *cfg, + unsigned mp_count) +{ + struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + unsigned p, c; + + for (p = 0; p < mp_count; ++p) { + const unsigned b = (0x14 / 4) * p; + + for (c = 0; c < cfg->num_counters; ++c) { + if (hq->data[b + 4] != hq->sequence) { + if (!wait) + return false; + if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client)) + return false; + } + count[p][c] = hq->data[b + hsq->ctr[c]]; + } + } + return true; +} + +static boolean +nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq, + boolean wait, union pipe_query_result *result) +{ + uint32_t count[32][4]; + uint64_t value = 0; + unsigned mp_count = MIN2(nv50->screen->MPsInTP, 32); + unsigned p, c; + const struct nv50_hw_sm_query_cfg *cfg; + bool ret; + + cfg = nv50_hw_sm_query_get_cfg(nv50, hq); + + ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count); + if (!ret) + return false; + + for (c = 0; c < cfg->num_counters; ++c) + for (p = 0; p < mp_count; ++p) + value += count[p][c]; + + /* We only count a single TP, and simply multiply by the total number of + * TPs to compute result over all TPs. This is inaccurate, but enough! */ + value *= nv50->screen->TPs; + + *(uint64_t *)result = value; + return true; +} + +static const struct nv50_hw_query_funcs hw_sm_query_funcs = { + .destroy_query = nv50_hw_sm_destroy_query, + .begin_query = nv50_hw_sm_begin_query, + .end_query = nv50_hw_sm_end_query, + .get_query_result = nv50_hw_sm_get_query_result, +}; + +struct nv50_hw_query * +nv50_hw_sm_create_query(struct nv50_context *nv50, unsigned type) +{ + struct nv50_hw_sm_query *hsq; + struct nv50_hw_query *hq; + unsigned space; + + if (type < NV50_HW_SM_QUERY(0) || type > NV50_HW_SM_QUERY_LAST) + return NULL; + + hsq = CALLOC_STRUCT(nv50_hw_sm_query); + if (!hsq) + return NULL; + + hq = &hsq->base; + hq->funcs = &hw_sm_query_funcs; + hq->base.type = type; + + /* + * for each MP: + * [00] = MP.C0 + * [04] = MP.C1 + * [08] = MP.C2 + * [0c] = MP.C3 + * [10] = MP.sequence + */ + space = (4 + 1) * nv50->screen->MPsInTP * sizeof(uint32_t); + + if (!nv50_hw_query_allocate(nv50, &hq->base, space)) { + FREE(hq); + return NULL; + } + + return hq; +} + +int +nv50_hw_sm_get_driver_query_info(struct nv50_screen *screen, unsigned id, + struct pipe_driver_query_info *info) +{ + int count = 0; + + if (screen->compute) + if (screen->base.class_3d >= NV84_3D_CLASS) + count += NV50_HW_SM_QUERY_COUNT; + + if (!info) + return count; + + if (id < count) { + if (screen->compute) { + if (screen->base.class_3d >= NV84_3D_CLASS) { + info->name = nv50_hw_sm_query_names[id]; + info->query_type = NV50_HW_SM_QUERY(id); + info->group_id = -1; + return 1; + } + } + } + return 0; +} diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h new file mode 100644 index 00000000000..c1a1cd175e3 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.h @@ -0,0 +1,45 @@ +#ifndef __NV50_QUERY_HW_SM_H__ +#define __NV50_QUERY_HW_SM_H__ + +#include "nv50_query_hw.h" + +struct nv50_hw_sm_query { + struct nv50_hw_query base; + uint8_t ctr[4]; +}; + +static inline struct nv50_hw_sm_query * +nv50_hw_sm_query(struct nv50_hw_query *hq) +{ + return (struct nv50_hw_sm_query *)hq; +} + +/* + * Performance counter queries: + */ +#define NV50_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) +#define NV50_HW_SM_QUERY_LAST NV50_HW_SM_QUERY(NV50_HW_SM_QUERY_COUNT - 1) +enum nv50_hw_sm_queries +{ + NV50_HW_SM_QUERY_BRANCH = 0, + NV50_HW_SM_QUERY_DIVERGENT_BRANCH, + NV50_HW_SM_QUERY_INSTRUCTIONS, + NV50_HW_SM_QUERY_PROF_TRIGGER_0, + NV50_HW_SM_QUERY_PROF_TRIGGER_1, + NV50_HW_SM_QUERY_PROF_TRIGGER_2, + NV50_HW_SM_QUERY_PROF_TRIGGER_3, + NV50_HW_SM_QUERY_PROF_TRIGGER_4, + NV50_HW_SM_QUERY_PROF_TRIGGER_5, + NV50_HW_SM_QUERY_PROF_TRIGGER_6, + NV50_HW_SM_QUERY_PROF_TRIGGER_7, + NV50_HW_SM_QUERY_SM_CTA_LAUNCHED, + NV50_HW_SM_QUERY_WARP_SERIALIZE, + NV50_HW_SM_QUERY_COUNT, +}; + +struct nv50_hw_query * +nv50_hw_sm_create_query(struct nv50_context *, unsigned); +int +nv50_hw_sm_get_driver_query_info(struct nv50_screen *, unsigned, + struct pipe_driver_query_info *); +#endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 0142e86ba20..4e7201d7dd9 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -789,6 +789,7 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->get_compute_param = nv50_screen_get_compute_param; + pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; nv50_screen_init_resource_functions(pscreen); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 153ceea7a4f..c2a16d8bd1d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -95,6 +95,12 @@ struct nv50_screen { struct nouveau_bo *bo; } fence; + struct { + struct nv50_program *prog; /* compute state object to read MP counters */ + struct nv50_hw_sm_query *mp_counter[4]; /* counter to query allocation */ + uint8_t num_hw_sm_active; + } pm; + struct nouveau_object *sync; struct nouveau_object *tesla; @@ -109,6 +115,9 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_info *); + bool nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); -- 2.11.0