src/gallium/drivers/nouveau/nv50/nv50_compute.c

   1 /*
   2  * Copyright 2012 Francisco Jerez
   3  * Copyright 2015 Samuel Pitoiset
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining
   6  * a copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sublicense, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the
  14  * next paragraph) shall be included in all copies or substantial
  15  * portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  *
  25  */
  26
  27 #include "nv50/nv50_context.h"
  28 #include "nv50/nv50_compute.xml.h"
  29
  30 #include "codegen/nv50_ir_driver.h"
  31
  32 int
  33 nv50_screen_compute_setup(struct nv50_screen *screen,
  34                           struct nouveau_pushbuf *push)
  35 {
  36    struct nouveau_device *dev = screen->base.device;
  37    struct nouveau_object *chan = screen->base.channel;
  38    struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
  39    unsigned obj_class;
  40    int i, ret;
  41
  42    switch (dev->chipset & 0xf0) {
  43    case 0x50:
  44    case 0x80:
  45    case 0x90:
  46       obj_class = NV50_COMPUTE_CLASS;
  47       break;
  48    case 0xa0:
  49       switch (dev->chipset) {
  50       case 0xa3:
  51       case 0xa5:
  52       case 0xa8:
  53          obj_class = NVA3_COMPUTE_CLASS;
  54          break;
  55       default:
  56          obj_class = NV50_COMPUTE_CLASS;
  57          break;
  58       }
  59       break;
  60    default:
  61       NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
  62       return -1;
  63    }
  64
  65    ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
  66                             &screen->compute);
  67    if (ret)
  68       return ret;
  69
  70    BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
  71    PUSH_DATA (push, screen->compute->handle);
  72
  73    BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
  74    PUSH_DATA (push, 1);
  75    BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
  76    PUSH_DATA (push, fifo->vram);
  77    BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
  78    PUSH_DATAh(push, screen->stack_bo->offset);
  79    PUSH_DATA (push, screen->stack_bo->offset);
  80    BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
  81    PUSH_DATA (push, 4);
  82
  83    BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
  84    PUSH_DATA (push, 1);
  85    BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
  86    PUSH_DATA (push, 1);
  87    BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
  88    PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
  89    BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
  90    PUSH_DATA (push, 0x100);
  91    BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
  92    PUSH_DATA (push, fifo->vram);
  93
  94    for (i = 0; i < 15; i++) {
  95       BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
  96       PUSH_DATA (push, 0);
  97       PUSH_DATA (push, 0);
  98       BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
  99       PUSH_DATA (push, 0);
 100       BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
 101       PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
 102    }
 103
 104    BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
 105    PUSH_DATA (push, 0);
 106    PUSH_DATA (push, 0);
 107    BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
 108    PUSH_DATA (push, ~0);
 109    BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
 110    PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
 111
 112    BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
 113    PUSH_DATA (push, 7);
 114    BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
 115    PUSH_DATA (push, 1);
 116    BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
 117    PUSH_DATA (push, 7);
 118    BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
 119    PUSH_DATA (push, 1);
 120    BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
 121    PUSH_DATA (push, 0);
 122
 123    BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
 124    PUSH_DATA (push, fifo->vram);
 125    BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
 126    PUSH_DATA (push, 0x54);
 127    BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
 128    PUSH_DATA (push, 0);
 129
 130    BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
 131    PUSH_DATA (push, fifo->vram);
 132    BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
 133    PUSH_DATAh(push, screen->txc->offset);
 134    PUSH_DATA (push, screen->txc->offset);
 135    PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
 136
 137    BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
 138    PUSH_DATA (push, fifo->vram);
 139    BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
 140    PUSH_DATAh(push, screen->txc->offset + 65536);
 141    PUSH_DATA (push, screen->txc->offset + 65536);
 142    PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
 143
 144    BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
 145    PUSH_DATA (push, fifo->vram);
 146
 147    BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
 148    PUSH_DATA (push, fifo->vram);
 149    BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
 150    PUSH_DATAh(push, screen->tls_bo->offset + 65536);
 151    PUSH_DATA (push, screen->tls_bo->offset + 65536);
 152    BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
 153    PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
 154
 155    return 0;
 156 }
 157
 158 static bool
 159 nv50_compute_validate_program(struct nv50_context *nv50)
 160 {
 161    struct nv50_program *prog = nv50->compprog;
 162
 163    if (prog->mem)
 164       return true;
 165
 166    if (!prog->translated) {
 167       prog->translated = nv50_program_translate(
 168          prog, nv50->screen->base.device->chipset, &nv50->base.debug);
 169       if (!prog->translated)
 170          return false;
 171    }
 172    if (unlikely(!prog->code_size))
 173       return false;
 174
 175    if (likely(prog->code_size)) {
 176       if (nv50_program_upload_code(nv50, prog)) {
 177          struct nouveau_pushbuf *push = nv50->base.pushbuf;
 178          BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
 179          PUSH_DATA (push, 0);
 180          return true;
 181       }
 182    }
 183    return false;
 184 }
 185
 186 static void
 187 nv50_compute_validate_globals(struct nv50_context *nv50)
 188 {
 189    unsigned i;
 190
 191    for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
 192         ++i) {
 193       struct pipe_resource *res = *util_dynarray_element(
 194          &nv50->global_residents, struct pipe_resource *, i);
 195       if (res)
 196          nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
 197                                   nv04_resource(res), NOUVEAU_BO_RDWR);
 198    }
 199 }
 200
 201 static bool
 202 nv50_compute_state_validate(struct nv50_context *nv50)
 203 {
 204    if (!nv50_compute_validate_program(nv50))
 205       return false;
 206
 207    if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
 208       nv50_compute_validate_globals(nv50);
 209
 210    /* TODO: validate textures, samplers, surfaces */
 211
 212    nv50_bufctx_fence(nv50->bufctx_cp, false);
 213
 214    nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
 215    if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
 216       return false;
 217    if (unlikely(nv50->state.flushed))
 218       nv50_bufctx_fence(nv50->bufctx_cp, true);
 219
 220    return true;
 221 }
 222
 223 static void
 224 nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
 225 {
 226    struct nv50_screen *screen = nv50->screen;
 227    struct nouveau_pushbuf *push = screen->base.pushbuf;
 228    unsigned size = align(nv50->compprog->parm_size, 0x4);
 229
 230    BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
 231    PUSH_DATA (push, (size / 4) << 8);
 232
 233    if (size) {
 234       struct nouveau_mm_allocation *mm;
 235       struct nouveau_bo *bo = NULL;
 236       unsigned offset;
 237
 238       mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
 239       assert(mm);
 240
 241       nouveau_bo_map(bo, 0, screen->base.client);
 242       memcpy(bo->map + offset, input, size);
 243
 244       nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
 245       nouveau_pushbuf_bufctx(push, nv50->bufctx);
 246       nouveau_pushbuf_validate(push);
 247
 248       BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
 249       nouveau_pushbuf_data(push, bo, offset, size);
 250
 251       nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
 252       nouveau_bo_ref(NULL, &bo);
 253       nouveau_bufctx_reset(nv50->bufctx, 0);
 254    }
 255 }
 256
 257 static uint32_t
 258 nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
 259 {
 260    struct nv50_program *prog = nv50->compprog;
 261    const struct nv50_ir_prog_symbol *syms =
 262       (const struct nv50_ir_prog_symbol *)prog->cp.syms;
 263    unsigned i;
 264
 265    for (i = 0; i < prog->cp.num_syms; ++i) {
 266       if (syms[i].label == label)
 267          return prog->code_base + syms[i].offset;
 268    }
 269    return prog->code_base; /* no symbols or symbol not found */
 270 }
 271
 272 void
 273 nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 274 {
 275    struct nv50_context *nv50 = nv50_context(pipe);
 276    struct nouveau_pushbuf *push = nv50->base.pushbuf;
 277    unsigned block_size = info->block[0] * info->block[1] * info->block[2];
 278    struct nv50_program *cp = nv50->compprog;
 279    bool ret;
 280
 281    ret = !nv50_compute_state_validate(nv50);
 282    if (ret) {
 283       NOUVEAU_ERR("Failed to launch grid !\n");
 284       return;
 285    }
 286
 287    nv50_compute_upload_input(nv50, info->input);
 288
 289    BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
 290    PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
 291
 292    BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
 293    PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
 294    BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
 295    PUSH_DATA (push, cp->max_gpr);
 296
 297    /* grid/block setup */
 298    BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
 299    PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
 300    PUSH_DATA (push, info->block[2]);
 301    BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
 302    PUSH_DATA (push, 1 << 16 | block_size);
 303    BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
 304    PUSH_DATA (push, 1);
 305    BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
 306    PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
 307    BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
 308    PUSH_DATA (push, 1);
 309
 310    /* kernel launching */
 311    BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
 312    PUSH_DATA (push, 0);
 313    BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
 314    PUSH_DATA (push, 0);
 315
 316    /* bind a compute shader clobbers fragment shader state */
 317    nv50->dirty_3d |= NV50_NEW_FRAGPROG;
 318 }