2 * Copyright 2012 Francisco Jerez
3 * Copyright 2015 Samuel Pitoiset
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "nv50/nv50_context.h"
28 #include "nv50/nv50_compute.xml.h"
30 #include "codegen/nv50_ir_driver.h"
33 nv50_screen_compute_setup(struct nv50_screen *screen,
34 struct nouveau_pushbuf *push)
36 struct nouveau_device *dev = screen->base.device;
37 struct nouveau_object *chan = screen->base.channel;
38 struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
42 switch (dev->chipset & 0xf0) {
46 obj_class = NV50_COMPUTE_CLASS;
49 switch (dev->chipset) {
53 obj_class = NVA3_COMPUTE_CLASS;
56 obj_class = NV50_COMPUTE_CLASS;
61 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
65 ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
70 BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
71 PUSH_DATA (push, screen->compute->handle);
73 BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
75 BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
76 PUSH_DATA (push, fifo->vram);
77 BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
78 PUSH_DATAh(push, screen->stack_bo->offset);
79 PUSH_DATA (push, screen->stack_bo->offset);
80 BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
83 BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
85 BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
87 BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
88 PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
89 BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
90 PUSH_DATA (push, 0x100);
91 BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
92 PUSH_DATA (push, fifo->vram);
94 for (i = 0; i < 15; i++) {
95 BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
98 BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
100 BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
101 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
104 BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
107 BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
108 PUSH_DATA (push, ~0);
109 BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
110 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
112 BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
114 BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
116 BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
118 BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
120 BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
123 BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
124 PUSH_DATA (push, fifo->vram);
125 BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
126 PUSH_DATA (push, 0x54);
127 BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
130 BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
131 PUSH_DATA (push, fifo->vram);
132 BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
133 PUSH_DATAh(push, screen->txc->offset);
134 PUSH_DATA (push, screen->txc->offset);
135 PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
137 BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
138 PUSH_DATA (push, fifo->vram);
139 BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
140 PUSH_DATAh(push, screen->txc->offset + 65536);
141 PUSH_DATA (push, screen->txc->offset + 65536);
142 PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
144 BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
145 PUSH_DATA (push, fifo->vram);
147 BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
148 PUSH_DATA (push, fifo->vram);
149 BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
150 PUSH_DATAh(push, screen->tls_bo->offset + 65536);
151 PUSH_DATA (push, screen->tls_bo->offset + 65536);
152 BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
153 PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
159 nv50_compute_validate_program(struct nv50_context *nv50)
161 struct nv50_program *prog = nv50->compprog;
166 if (!prog->translated) {
167 prog->translated = nv50_program_translate(
168 prog, nv50->screen->base.device->chipset, &nv50->base.debug);
169 if (!prog->translated)
172 if (unlikely(!prog->code_size))
175 if (likely(prog->code_size)) {
176 if (nv50_program_upload_code(nv50, prog)) {
177 struct nouveau_pushbuf *push = nv50->base.pushbuf;
178 BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
187 nv50_compute_validate_globals(struct nv50_context *nv50)
191 for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
193 struct pipe_resource *res = *util_dynarray_element(
194 &nv50->global_residents, struct pipe_resource *, i);
196 nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
197 nv04_resource(res), NOUVEAU_BO_RDWR);
202 nv50_compute_state_validate(struct nv50_context *nv50)
204 if (!nv50_compute_validate_program(nv50))
207 if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
208 nv50_compute_validate_globals(nv50);
210 /* TODO: validate textures, samplers, surfaces */
212 nv50_bufctx_fence(nv50->bufctx_cp, false);
214 nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
215 if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
217 if (unlikely(nv50->state.flushed))
218 nv50_bufctx_fence(nv50->bufctx_cp, true);
224 nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
226 struct nv50_screen *screen = nv50->screen;
227 struct nouveau_pushbuf *push = screen->base.pushbuf;
228 unsigned size = align(nv50->compprog->parm_size, 0x4);
230 BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
231 PUSH_DATA (push, (size / 4) << 8);
234 struct nouveau_mm_allocation *mm;
235 struct nouveau_bo *bo = NULL;
238 mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
241 nouveau_bo_map(bo, 0, screen->base.client);
242 memcpy(bo->map + offset, input, size);
244 nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
245 nouveau_pushbuf_bufctx(push, nv50->bufctx);
246 nouveau_pushbuf_validate(push);
248 BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
249 nouveau_pushbuf_data(push, bo, offset, size);
251 nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
252 nouveau_bo_ref(NULL, &bo);
253 nouveau_bufctx_reset(nv50->bufctx, 0);
258 nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
260 struct nv50_program *prog = nv50->compprog;
261 const struct nv50_ir_prog_symbol *syms =
262 (const struct nv50_ir_prog_symbol *)prog->cp.syms;
265 for (i = 0; i < prog->cp.num_syms; ++i) {
266 if (syms[i].label == label)
267 return prog->code_base + syms[i].offset;
269 return prog->code_base; /* no symbols or symbol not found */
273 nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
275 struct nv50_context *nv50 = nv50_context(pipe);
276 struct nouveau_pushbuf *push = nv50->base.pushbuf;
277 unsigned block_size = info->block[0] * info->block[1] * info->block[2];
278 struct nv50_program *cp = nv50->compprog;
281 ret = !nv50_compute_state_validate(nv50);
283 NOUVEAU_ERR("Failed to launch grid !\n");
287 nv50_compute_upload_input(nv50, info->input);
289 BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
290 PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
292 BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
293 PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
294 BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
295 PUSH_DATA (push, cp->max_gpr);
297 /* grid/block setup */
298 BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
299 PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
300 PUSH_DATA (push, info->block[2]);
301 BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
302 PUSH_DATA (push, 1 << 16 | block_size);
303 BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
305 BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
306 PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
307 BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
310 /* kernel launching */
311 BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
313 BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
316 /* bind a compute shader clobbers fragment shader state */
317 nv50->dirty_3d |= NV50_NEW_FRAGPROG;