2 * Copyright 2013 Nouveau Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Christoph Bumiller, Samuel Pitoiset
25 #include "nvc0/nvc0_context.h"
27 #include "nvc0/nvc0_compute.xml.h"
30 nvc0_screen_compute_setup(struct nvc0_screen *screen,
31 struct nouveau_pushbuf *push)
33 struct nouveau_object *chan = screen->base.channel;
34 struct nouveau_device *dev = screen->base.device;
39 switch (dev->chipset & ~0xf) {
42 /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
43 * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
44 obj_class = NVC0_COMPUTE_CLASS;
47 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
51 ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
54 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
58 BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
59 PUSH_DATA (push, screen->compute->oclass);
62 BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
63 PUSH_DATA (push, screen->mp_count);
64 BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
65 PUSH_DATA (push, 0xf);
67 BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
68 PUSH_DATA (push, 0x8000);
70 /* global memory setup */
71 BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
73 BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
74 for (i = 0; i <= 0xff; i++)
75 PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
76 BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
79 /* local memory and cstack setup */
80 BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
81 PUSH_DATAh(push, screen->tls->offset);
82 PUSH_DATA (push, screen->tls->offset);
83 BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
84 PUSH_DATAh(push, screen->tls->size);
85 PUSH_DATA (push, screen->tls->size);
86 BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
88 BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
89 PUSH_DATA (push, 0xff << 24);
91 /* shared memory setup */
92 BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
93 PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
94 BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
95 PUSH_DATA (push, 0xfe << 24);
96 BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
99 /* code segment setup */
100 BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
101 PUSH_DATAh(push, screen->text->offset);
102 PUSH_DATA (push, screen->text->offset);
105 BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
106 PUSH_DATAh(push, screen->txc->offset);
107 PUSH_DATA (push, screen->txc->offset);
108 PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
111 BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
112 PUSH_DATAh(push, screen->txc->offset + 65536);
113 PUSH_DATA (push, screen->txc->offset + 65536);
114 PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
120 nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
122 bool need_flush = nvc0_validate_tsc(nvc0, 5);
124 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
125 PUSH_DATA (nvc0->base.pushbuf, 0);
130 nvc0_compute_validate_textures(struct nvc0_context *nvc0)
132 bool need_flush = nvc0_validate_tic(nvc0, 5);
134 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
135 PUSH_DATA (nvc0->base.pushbuf, 0);
140 nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
142 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
145 while (nvc0->constbuf_dirty[s]) {
146 int i = ffs(nvc0->constbuf_dirty[s]) - 1;
147 nvc0->constbuf_dirty[s] &= ~(1 << i);
149 if (nvc0->constbuf[s][i].user) {
150 struct nouveau_bo *bo = nvc0->screen->uniform_bo;
151 const unsigned base = NVC0_CB_USR_INFO(s);
152 const unsigned size = nvc0->constbuf[s][0].size;
153 assert(i == 0); /* we really only want OpenGL uniforms here */
154 assert(nvc0->constbuf[s][0].u.data);
156 if (nvc0->state.uniform_buffer_bound[s] < size) {
157 nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
159 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
160 PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
161 PUSH_DATAh(push, bo->offset + base);
162 PUSH_DATA (push, bo->offset + base);
163 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
164 PUSH_DATA (push, (0 << 8) | 1);
166 nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
167 base, nvc0->state.uniform_buffer_bound[s],
169 nvc0->constbuf[s][0].u.data);
171 struct nv04_resource *res =
172 nv04_resource(nvc0->constbuf[s][i].u.buf);
174 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
175 PUSH_DATA (push, nvc0->constbuf[s][i].size);
176 PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
177 PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
178 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
179 PUSH_DATA (push, (i << 8) | 1);
181 BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
183 res->cb_bindings[s] |= 1 << i;
185 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
186 PUSH_DATA (push, (i << 8) | 0);
189 nvc0->state.uniform_buffer_bound[s] = 0;
193 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
194 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
198 nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
200 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
201 struct nvc0_screen *screen = nvc0->screen;
203 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
204 PUSH_DATA (push, 2048);
205 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
206 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
207 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
208 PUSH_DATA (push, (15 << 8) | 1);
210 nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
214 nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
216 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
217 struct nvc0_screen *screen = nvc0->screen;
221 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
222 PUSH_DATA (push, 2048);
223 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
224 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
225 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
226 PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
228 for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
229 if (nvc0->buffers[s][i].buffer) {
230 struct nv04_resource *res =
231 nv04_resource(nvc0->buffers[s][i].buffer);
232 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
233 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
234 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
236 BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
247 nvc0_compute_validate_globals(struct nvc0_context *nvc0)
251 for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
253 struct pipe_resource *res = *util_dynarray_element(
254 &nvc0->global_residents, struct pipe_resource *, i);
256 nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,
257 nv04_resource(res), NOUVEAU_BO_RDWR);
261 static struct nvc0_state_validate
262 validate_list_cp[] = {
263 { nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM },
264 { nvc0_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF },
265 { nvc0_compute_validate_driverconst, NVC0_NEW_CP_DRIVERCONST },
266 { nvc0_compute_validate_buffers, NVC0_NEW_CP_BUFFERS },
267 { nvc0_compute_validate_textures, NVC0_NEW_CP_TEXTURES },
268 { nvc0_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS },
269 { nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS },
273 nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
277 ret = nvc0_state_validate(nvc0, mask, validate_list_cp,
278 ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,
281 if (unlikely(nvc0->state.flushed))
282 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
287 nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
289 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
290 struct nvc0_screen *screen = nvc0->screen;
291 struct nvc0_program *cp = nvc0->compprog;
294 struct nouveau_bo *bo = screen->uniform_bo;
295 const unsigned base = NVC0_CB_USR_INFO(5);
297 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
298 PUSH_DATA (push, align(cp->parm_size, 0x100));
299 PUSH_DATAh(push, bo->offset + base);
300 PUSH_DATA (push, bo->offset + base);
301 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
302 PUSH_DATA (push, (0 << 8) | 1);
303 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
304 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
306 PUSH_DATAp(push, input, cp->parm_size / 4);
308 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
309 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
314 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
316 struct nvc0_context *nvc0 = nvc0_context(pipe);
317 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
318 struct nvc0_program *cp = nvc0->compprog;
322 ret = !nvc0_state_validate_cp(nvc0, ~0);
324 NOUVEAU_ERR("Failed to launch grid !\n");
328 nvc0_compute_upload_input(nvc0, info->input);
330 BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
331 PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
333 BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
334 PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
336 PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
338 BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
339 PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
340 PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
341 PUSH_DATA (push, cp->num_barriers);
342 BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
343 PUSH_DATA (push, cp->num_gprs);
345 /* launch preliminary setup */
346 BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
347 PUSH_DATA (push, 0x1);
348 BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
350 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
351 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
354 BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
355 PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
356 PUSH_DATA (push, info->block[2]);
358 if (unlikely(info->indirect)) {
359 struct nv04_resource *res = nv04_resource(info->indirect);
360 uint32_t offset = res->offset + info->indirect_offset;
361 unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
363 nouveau_pushbuf_space(push, 16, 0, 1);
364 PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
365 PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
366 nouveau_pushbuf_data(push, res->bo, offset,
367 NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
370 BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
371 PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
372 PUSH_DATA (push, info->grid[2]);
374 /* kernel launching */
375 BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
377 BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
379 BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
380 PUSH_DATA (push, 0x1000);
381 BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
383 BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
384 PUSH_DATA (push, 0x1);
387 /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
388 nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
389 for (s = 0; s < 5; s++) {
390 nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
391 nvc0->state.uniform_buffer_bound[s] = 0;