return ret;
}
- ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
- &screen->parm);
- if (ret)
- return ret;
-
BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->oclass);
if (view->resource) {
struct nv04_resource *res = nv04_resource(view->resource);
- nve4_set_surface_info(push, view, screen);
+ if (res->base.target == PIPE_BUFFER) {
+ if (view->access & PIPE_IMAGE_ACCESS_WRITE)
+ nvc0_mark_image_range_valid(view);
+ }
+
+ nve4_set_surface_info(push, view, nvc0);
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
} else {
for (j = 0; j < 16; j++)
}
}
-/* Will be removed once images are completely done. */
-#if 0
-static void
-nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
-{
- struct nvc0_screen *screen = nvc0->screen;
- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- struct nv50_surface *sf;
- struct nv04_resource *res;
- uint32_t mask;
- unsigned i;
- const unsigned t = 1;
- uint64_t address;
-
- address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
-
- mask = nvc0->surfaces_dirty[t];
- while (mask) {
- i = ffs(mask) - 1;
- mask &= ~(1 << i);
-
- /*
- * NVE4's surface load/store instructions receive all the information
- * directly instead of via binding points, so we have to supply them.
- */
- BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));
- PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));
- BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
- PUSH_DATA (push, 64);
- PUSH_DATA (push, 1);
- BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17);
- PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
-
- nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
-
- sf = nv50_surface(nvc0->surfaces[t][i]);
- if (sf) {
- res = nv04_resource(sf->base.texture);
-
- if (sf->base.writable)
- BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
- else
- BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
- }
- }
- if (nvc0->surfaces_dirty[t]) {
- BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
- PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
- }
-
- /* re-reference non-dirty surfaces */
- mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
- while (mask) {
- i = ffs(mask) - 1;
- mask &= ~(1 << i);
-
- sf = nv50_surface(nvc0->surfaces[t][i]);
- res = nv04_resource(sf->base.texture);
-
- if (sf->base.writable)
- BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
- else
- BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
- }
-
- nvc0->surfaces_dirty[t] = 0;
-}
-#endif
-
/* Thankfully, textures with samplers follow the normal rules. */
static void
nve4_compute_validate_samplers(struct nvc0_context *nvc0)
BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
+
+ /* Invalidate all 3D samplers because they are aliased. */
+ for (int s = 0; s < 5; s++)
+ nvc0->samplers_dirty[s] = ~0;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
}
+
/* (Code duplicated at bottom for various non-convincing reasons.
* E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
* entries to avoid a subchannel switch.
PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
PUSH_DATA (push, 0);
BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
+ util_range_add(&res->valid_buffer_range,
+ nvc0->buffers[s][i].buffer_offset,
+ nvc0->buffers[s][i].buffer_size);
} else {
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
static void
nve4_compute_upload_input(struct nvc0_context *nvc0,
- struct nve4_cp_launch_desc *desc,
const struct pipe_grid_info *info)
{
struct nvc0_screen *screen = nvc0->screen;
if (cp->parm_size) {
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->parm->offset);
- PUSH_DATA (push, screen->parm->offset);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5));
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
PUSH_DATA (push, cp->parm_size);
PUSH_DATA (push, 0x1);
BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
PUSH_DATAp(push, info->input, cp->parm_size / 4);
-
- /* Bind user parameters coming from clover. */
- /* TODO: This should be harmonized with uniform_bo. */
- assert(!(desc->cb_mask & (1 << 0)));
- nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12);
}
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
desc->blockdim_z = info->block[2];
desc->shared_size = align(cp->cp.smem_size, 0x100);
- desc->local_size_p = align(cp->cp.lmem_size, 0x10);
+ desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10);
desc->local_size_n = 0;
desc->cstack_size = 0x800;
desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
desc->gpr_alloc = cp->num_gprs;
desc->bar_alloc = cp->num_barriers;
- // Only bind OpenGL uniforms and the driver constant buffer through the
+ // Only bind user uniforms and the driver constant buffer through the
// launch descriptor because UBOs are sticked to the driver cb to avoid the
// limitation of 8 CBs.
- if (nvc0->constbuf[5][0].user) {
+ if (nvc0->constbuf[5][0].user || cp->parm_size) {
nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
NVC0_CB_USR_INFO(5), 1 << 16);
}
nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nve4_cp_launch_desc *desc;
uint64_t desc_gpuaddr;
struct nouveau_bo *desc_bo;
int ret;
+ pipe_mutex_lock(screen->base.push_mutex);
+
desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
if (!desc) {
ret = -1;
nve4_compute_setup_launch_desc(nvc0, desc, info);
- nve4_compute_upload_input(nvc0, desc, info);
+ nve4_compute_upload_input(nvc0, info);
#ifdef DEBUG
if (debug_get_num_option("NV50_PROG_DEBUG", 0))
NOUVEAU_ERR("Failed to launch grid !\n");
nouveau_scratch_done(&nvc0->base);
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
+ pipe_mutex_unlock(screen->base.push_mutex);
}
continue;
}
res = nv04_resource(tic->pipe.texture);
+ nvc0_update_tic(nvc0, tic, res);
if (tic->id < 0) {
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
if (dirty)
BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
}
- for (; i < nvc0->state.num_textures[s]; ++i)
+ for (; i < nvc0->state.num_textures[s]; ++i) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ nvc0->textures_dirty[s] |= 1 << i;
+ }
if (n[0]) {
BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]);
}
nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ /* Invalidate all 3D textures because they are aliased. */
+ for (int s = 0; s < 5; s++) {
+ for (int i = 0; i < nvc0->num_textures[s]; i++)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
+ nvc0->textures_dirty[s] = ~0;
+ }
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
}