src/gallium/drivers/nouveau/nv50/nv50_query_hw.c

   1 /*
   2  * Copyright 2011 Christoph Bumiller
   3  * Copyright 2015 Samuel Pitoiset
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in
  13  * all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21  * OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #define NV50_PUSH_EXPLICIT_SPACE_CHECKING
  25
  26 #include "nv50/nv50_context.h"
  27 #include "nv50/nv50_query_hw.h"
  28 #include "nv50/nv50_query_hw_metric.h"
  29 #include "nv50/nv50_query_hw_sm.h"
  30 #include "nv_object.xml.h"
  31
  32 #define NV50_HW_QUERY_STATE_READY   0
  33 #define NV50_HW_QUERY_STATE_ACTIVE  1
  34 #define NV50_HW_QUERY_STATE_ENDED   2
  35 #define NV50_HW_QUERY_STATE_FLUSHED 3
  36
  37 /* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
  38  * (since we use only a single GPU channel per screen) will not work properly.
  39  *
  40  * The first is not that big of an issue because OpenGL does not allow nested
  41  * queries anyway.
  42  */
  43
  44 #define NV50_HW_QUERY_ALLOC_SPACE 256
  45
  46 bool
  47 nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
  48                        int size)
  49 {
  50    struct nv50_screen *screen = nv50->screen;
  51    struct nv50_hw_query *hq = nv50_hw_query(q);
  52    int ret;
  53
  54    if (hq->bo) {
  55       nouveau_bo_ref(NULL, &hq->bo);
  56       if (hq->mm) {
  57          if (hq->state == NV50_HW_QUERY_STATE_READY)
  58             nouveau_mm_free(hq->mm);
  59          else
  60             nouveau_fence_work(screen->base.fence.current,
  61                                nouveau_mm_free_work, hq->mm);
  62       }
  63    }
  64    if (size) {
  65       hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size,
  66                                    &hq->bo, &hq->base_offset);
  67       if (!hq->bo)
  68          return false;
  69       hq->offset = hq->base_offset;
  70
  71       ret = nouveau_bo_map(hq->bo, 0, screen->base.client);
  72       if (ret) {
  73          nv50_hw_query_allocate(nv50, q, 0);
  74          return false;
  75       }
  76       hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
  77    }
  78    return true;
  79 }
  80
  81 static void
  82 nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
  83                unsigned offset, uint32_t get)
  84 {
  85    struct nv50_hw_query *hq = nv50_hw_query(q);
  86
  87    offset += hq->offset;
  88
  89    PUSH_SPACE(push, 5);
  90    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
  91    BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
  92    PUSH_DATAh(push, hq->bo->offset + offset);
  93    PUSH_DATA (push, hq->bo->offset + offset);
  94    PUSH_DATA (push, hq->sequence);
  95    PUSH_DATA (push, get);
  96 }
  97
  98 static inline void
  99 nv50_hw_query_update(struct nv50_query *q)
 100 {
 101    struct nv50_hw_query *hq = nv50_hw_query(q);
 102
 103    if (hq->is64bit) {
 104       if (nouveau_fence_signalled(hq->fence))
 105          hq->state = NV50_HW_QUERY_STATE_READY;
 106    } else {
 107       if (hq->data[0] == hq->sequence)
 108          hq->state = NV50_HW_QUERY_STATE_READY;
 109    }
 110 }
 111
 112 static void
 113 nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
 114 {
 115    struct nv50_hw_query *hq = nv50_hw_query(q);
 116
 117    if (hq->funcs && hq->funcs->destroy_query) {
 118       hq->funcs->destroy_query(nv50, hq);
 119       return;
 120    }
 121
 122    nv50_hw_query_allocate(nv50, q, 0);
 123    nouveau_fence_ref(NULL, &hq->fence);
 124    FREE(hq);
 125 }
 126
 127 static boolean
 128 nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
 129 {
 130    struct nouveau_pushbuf *push = nv50->base.pushbuf;
 131    struct nv50_hw_query *hq = nv50_hw_query(q);
 132    bool ret = true;
 133
 134    if (hq->funcs && hq->funcs->begin_query)
 135       return hq->funcs->begin_query(nv50, hq);
 136
 137    /* For occlusion queries we have to change the storage, because a previous
 138     * query might set the initial render condition to false even *after* we re-
 139     * initialized it to true.
 140     */
 141    if (hq->rotate) {
 142       hq->offset += hq->rotate;
 143       hq->data += hq->rotate / sizeof(*hq->data);
 144       if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
 145          nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
 146
 147       /* XXX: can we do this with the GPU, and sync with respect to a previous
 148        *  query ?
 149        */
 150       hq->data[0] = hq->sequence; /* initialize sequence */
 151       hq->data[1] = 1; /* initial render condition = true */
 152       hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
 153       hq->data[5] = 0;
 154    }
 155    if (!hq->is64bit)
 156       hq->data[0] = hq->sequence++; /* the previously used one */
 157
 158    pipe_mutex_lock(nv50->screen->base.push_mutex);
 159    switch (q->type) {
 160    case PIPE_QUERY_OCCLUSION_COUNTER:
 161    case PIPE_QUERY_OCCLUSION_PREDICATE:
 162       hq->nesting = nv50->screen->num_occlusion_queries_active++;
 163       if (hq->nesting) {
 164          nv50_hw_query_get(push, q, 0x10, 0x0100f002);
 165       } else {
 166          PUSH_SPACE(push, 4);
 167          BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
 168          PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
 169          BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
 170          PUSH_DATA (push, 1);
 171       }
 172       break;
 173    case PIPE_QUERY_PRIMITIVES_GENERATED:
 174       nv50_hw_query_get(push, q, 0x10, 0x06805002);
 175       break;
 176    case PIPE_QUERY_PRIMITIVES_EMITTED:
 177       nv50_hw_query_get(push, q, 0x10, 0x05805002);
 178       break;
 179    case PIPE_QUERY_SO_STATISTICS:
 180       nv50_hw_query_get(push, q, 0x20, 0x05805002);
 181       nv50_hw_query_get(push, q, 0x30, 0x06805002);
 182       break;
 183    case PIPE_QUERY_PIPELINE_STATISTICS:
 184       nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
 185       nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
 186       nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
 187       nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
 188       nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
 189       nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
 190       nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
 191       nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
 192       break;
 193    case PIPE_QUERY_TIME_ELAPSED:
 194       nv50_hw_query_get(push, q, 0x10, 0x00005002);
 195       break;
 196    default:
 197       assert(0);
 198       ret = false;
 199       break;
 200    }
 201    pipe_mutex_unlock(nv50->screen->base.push_mutex);
 202    if (ret)
 203       hq->state = NV50_HW_QUERY_STATE_ACTIVE;
 204    return ret;
 205 }
 206
 207 static void
 208 nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
 209 {
 210    struct nouveau_pushbuf *push = nv50->base.pushbuf;
 211    struct nv50_hw_query *hq = nv50_hw_query(q);
 212
 213    if (hq->funcs && hq->funcs->end_query) {
 214       hq->funcs->end_query(nv50, hq);
 215       return;
 216    }
 217
 218    hq->state = NV50_HW_QUERY_STATE_ENDED;
 219
 220    pipe_mutex_lock(nv50->screen->base.push_mutex);
 221    switch (q->type) {
 222    case PIPE_QUERY_OCCLUSION_COUNTER:
 223    case PIPE_QUERY_OCCLUSION_PREDICATE:
 224       nv50_hw_query_get(push, q, 0, 0x0100f002);
 225       if (--nv50->screen->num_occlusion_queries_active == 0) {
 226          PUSH_SPACE(push, 2);
 227          BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
 228          PUSH_DATA (push, 0);
 229       }
 230       break;
 231    case PIPE_QUERY_PRIMITIVES_GENERATED:
 232       nv50_hw_query_get(push, q, 0, 0x06805002);
 233       break;
 234    case PIPE_QUERY_PRIMITIVES_EMITTED:
 235       nv50_hw_query_get(push, q, 0, 0x05805002);
 236       break;
 237    case PIPE_QUERY_SO_STATISTICS:
 238       nv50_hw_query_get(push, q, 0x00, 0x05805002);
 239       nv50_hw_query_get(push, q, 0x10, 0x06805002);
 240       break;
 241    case PIPE_QUERY_PIPELINE_STATISTICS:
 242       nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
 243       nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
 244       nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
 245       nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
 246       nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
 247       nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
 248       nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
 249       nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
 250       break;
 251    case PIPE_QUERY_TIMESTAMP:
 252       hq->sequence++;
 253       /* fall through */
 254    case PIPE_QUERY_TIME_ELAPSED:
 255       nv50_hw_query_get(push, q, 0, 0x00005002);
 256       break;
 257    case PIPE_QUERY_GPU_FINISHED:
 258       hq->sequence++;
 259       nv50_hw_query_get(push, q, 0, 0x1000f010);
 260       break;
 261    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 262       hq->sequence++;
 263       nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
 264       break;
 265    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 266       /* This query is not issued on GPU because disjoint is forced to false */
 267       hq->state = NV50_HW_QUERY_STATE_READY;
 268       break;
 269    default:
 270       assert(0);
 271       break;
 272    }
 273    pipe_mutex_unlock(nv50->screen->base.push_mutex);
 274    if (hq->is64bit)
 275       nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
 276 }
 277
 278 static boolean
 279 nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
 280                          boolean wait, union pipe_query_result *result)
 281 {
 282    struct nv50_hw_query *hq = nv50_hw_query(q);
 283    uint64_t *res64 = (uint64_t *)result;
 284    uint32_t *res32 = (uint32_t *)result;
 285    uint8_t *res8 = (uint8_t *)result;
 286    uint64_t *data64 = (uint64_t *)hq->data;
 287    int i;
 288
 289    if (hq->funcs && hq->funcs->get_query_result)
 290       return hq->funcs->get_query_result(nv50, hq, wait, result);
 291
 292    if (hq->state != NV50_HW_QUERY_STATE_READY)
 293       nv50_hw_query_update(q);
 294
 295    if (hq->state != NV50_HW_QUERY_STATE_READY) {
 296       pipe_mutex_lock(nv50->screen->base.push_mutex);
 297       if (!wait) {
 298          /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
 299          if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
 300             hq->state = NV50_HW_QUERY_STATE_FLUSHED;
 301             PUSH_KICK(nv50->base.pushbuf);
 302          }
 303          pipe_mutex_unlock(nv50->screen->base.push_mutex);
 304          return false;
 305       }
 306       if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) {
 307          pipe_mutex_unlock(nv50->screen->base.push_mutex);
 308          return false;
 309       }
 310       pipe_mutex_unlock(nv50->screen->base.push_mutex);
 311    }
 312    hq->state = NV50_HW_QUERY_STATE_READY;
 313
 314    switch (q->type) {
 315    case PIPE_QUERY_GPU_FINISHED:
 316       res8[0] = true;
 317       break;
 318    case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
 319       res64[0] = hq->data[1] - hq->data[5];
 320       break;
 321    case PIPE_QUERY_OCCLUSION_PREDICATE:
 322       res8[0] = hq->data[1] != hq->data[5];
 323       break;
 324    case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
 325    case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
 326       res64[0] = data64[0] - data64[2];
 327       break;
 328    case PIPE_QUERY_SO_STATISTICS:
 329       res64[0] = data64[0] - data64[4];
 330       res64[1] = data64[2] - data64[6];
 331       break;
 332    case PIPE_QUERY_PIPELINE_STATISTICS:
 333       for (i = 0; i < 8; ++i)
 334          res64[i] = data64[i * 2] - data64[16 + i * 2];
 335       break;
 336    case PIPE_QUERY_TIMESTAMP:
 337       res64[0] = data64[1];
 338       break;
 339    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 340       res64[0] = 1000000000;
 341       res8[8] = false;
 342       break;
 343    case PIPE_QUERY_TIME_ELAPSED:
 344       res64[0] = data64[1] - data64[3];
 345       break;
 346    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 347       res32[0] = hq->data[1];
 348       break;
 349    default:
 350       assert(0);
 351       return false;
 352    }
 353
 354    return true;
 355 }
 356
 357 static const struct nv50_query_funcs hw_query_funcs = {
 358    .destroy_query = nv50_hw_destroy_query,
 359    .begin_query = nv50_hw_begin_query,
 360    .end_query = nv50_hw_end_query,
 361    .get_query_result = nv50_hw_get_query_result,
 362 };
 363
 364 struct nv50_query *
 365 nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
 366 {
 367    struct nv50_hw_query *hq;
 368    struct nv50_query *q;
 369
 370    hq = nv50_hw_sm_create_query(nv50, type);
 371    if (hq) {
 372       hq->base.funcs = &hw_query_funcs;
 373       return (struct nv50_query *)hq;
 374    }
 375
 376    hq = nv50_hw_metric_create_query(nv50, type);
 377    if (hq) {
 378       hq->base.funcs = &hw_query_funcs;
 379       return (struct nv50_query *)hq;
 380    }
 381
 382    hq = CALLOC_STRUCT(nv50_hw_query);
 383    if (!hq)
 384       return NULL;
 385
 386    q = &hq->base;
 387    q->funcs = &hw_query_funcs;
 388    q->type = type;
 389
 390    switch (q->type) {
 391    case PIPE_QUERY_OCCLUSION_COUNTER:
 392    case PIPE_QUERY_OCCLUSION_PREDICATE:
 393       hq->rotate = 32;
 394       break;
 395    case PIPE_QUERY_PRIMITIVES_GENERATED:
 396    case PIPE_QUERY_PRIMITIVES_EMITTED:
 397    case PIPE_QUERY_SO_STATISTICS:
 398    case PIPE_QUERY_PIPELINE_STATISTICS:
 399       hq->is64bit = true;
 400       break;
 401    case PIPE_QUERY_TIME_ELAPSED:
 402    case PIPE_QUERY_TIMESTAMP:
 403    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 404    case PIPE_QUERY_GPU_FINISHED:
 405    case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
 406       break;
 407    default:
 408       debug_printf("invalid query type: %u\n", type);
 409       FREE(q);
 410       return NULL;
 411    }
 412
 413    if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
 414       FREE(hq);
 415       return NULL;
 416    }
 417
 418    if (hq->rotate) {
 419       /* we advance before query_begin ! */
 420       hq->offset -= hq->rotate;
 421       hq->data -= hq->rotate / sizeof(*hq->data);
 422    }
 423
 424    return q;
 425 }
 426
 427 int
 428 nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id,
 429                               struct pipe_driver_query_info *info)
 430 {
 431    int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
 432
 433    num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL);
 434    num_hw_metric_queries =
 435       nv50_hw_metric_get_driver_query_info(screen, 0, NULL);
 436
 437    if (!info)
 438       return num_hw_sm_queries + num_hw_metric_queries;
 439
 440    if (id < num_hw_sm_queries)
 441       return nv50_hw_sm_get_driver_query_info(screen, id, info);
 442
 443    return nv50_hw_metric_get_driver_query_info(screen,
 444                                                id - num_hw_sm_queries, info);
 445 }
 446
 447 void
 448 nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
 449                              struct nv50_query *q, unsigned result_offset)
 450 {
 451    struct nv50_hw_query *hq = nv50_hw_query(q);
 452
 453    nv50_hw_query_update(q);
 454    if (hq->state != NV50_HW_QUERY_STATE_READY)
 455       nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client);
 456    hq->state = NV50_HW_QUERY_STATE_READY;
 457
 458    BEGIN_NV04(push, SUBC_3D(method), 1);
 459    PUSH_DATA (push, hq->data[result_offset / 4]);
 460 }
 461
 462 void
 463 nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
 464 {
 465    struct nv50_hw_query *hq = nv50_hw_query(q);
 466    unsigned offset = hq->offset;
 467
 468    PUSH_SPACE(push, 5);
 469    PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
 470    BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
 471    PUSH_DATAh(push, hq->bo->offset + offset);
 472    PUSH_DATA (push, hq->bo->offset + offset);
 473    PUSH_DATA (push, hq->sequence);
 474    PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
 475 }