1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
3 * Copyright (C) The Weather Channel, Inc. 2002.
4 * Copyright (C) 2004 Nicolai Haehnle.
7 * The Weather Channel (TM) funded Tungsten Graphics to develop the
8 * initial release of the Radeon 8500 driver under the XFree86 license.
9 * This notice must be preserved.
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 * DEALINGS IN THE SOFTWARE.
31 * Nicolai Haehnle <prefect_@gmx.net>
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
40 #define R300_SIMULTANEOUS_CLIPRECTS 4
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
44 static const int r300_cliprect_cntl[4] = {
52 * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53 * buffer, starting with index n.
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56 drm_radeon_kcmd_buffer_t *cmdbuf, int n)
58 struct drm_clip_rect box;
63 nr = cmdbuf->nbox - n;
64 if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65 nr = R300_SIMULTANEOUS_CLIPRECTS;
67 DRM_DEBUG("%i cliprects\n", nr);
70 BEGIN_RING(6 + nr * 2);
71 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
73 for (i = 0; i < nr; ++i) {
74 if (DRM_COPY_FROM_USER_UNCHECKED
75 (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76 DRM_ERROR("copy cliprect faulted\n");
80 box.x2--; /* Hardware expects inclusive bottom-right corner */
83 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
93 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
95 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
97 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
99 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
103 OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
104 (box.y1 << R300_CLIPRECT_Y_SHIFT));
105 OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
106 (box.y2 << R300_CLIPRECT_Y_SHIFT));
110 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
112 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
113 * client might be able to trample over memory.
114 * The impact should be very limited, but I'd rather be safe than
117 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
119 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
122 /* Why we allow zero cliprect rendering:
123 * There are some commands in a command buffer that must be submitted
124 * even when there are no cliprects, e.g. DMA buffer discard
125 * or state setting (though state setting could be avoided by
126 * simulating a loss of context).
128 * Now since the cmdbuf interface is so chaotic right now (and is
129 * bound to remain that way for a bit until things settle down),
130 * it is basically impossible to filter out the commands that are
131 * necessary and those that aren't.
133 * So I choose the safe way and don't do any filtering at all;
134 * instead, I simply set up the engine so that all rendering
135 * can't produce any fragments.
138 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
142 /* flus cache and wait idle clean after cliprect change */
144 OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
145 OUT_RING(R300_RB3D_DC_FLUSH);
148 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
149 OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
152 dev_priv->track_flush |= RADEON_FLUSH_EMITED;
157 static u8 r300_reg_flags[0x10000 >> 2];
159 void r300_init_reg_flags(struct drm_device *dev)
162 drm_radeon_private_t *dev_priv = dev->dev_private;
164 memset(r300_reg_flags, 0, 0x10000 >> 2);
165 #define ADD_RANGE_MARK(reg, count,mark) \
166 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
167 r300_reg_flags[i]|=(mark);
170 #define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE)
172 /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
173 ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
174 ADD_RANGE(R300_VAP_CNTL, 1);
175 ADD_RANGE(R300_SE_VTE_CNTL, 2);
176 ADD_RANGE(0x2134, 2);
177 ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
178 ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
179 ADD_RANGE(0x21DC, 1);
180 ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
181 ADD_RANGE(R300_VAP_CLIP_X_0, 4);
182 ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
183 ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
184 ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
185 ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
186 ADD_RANGE(R300_GB_ENABLE, 1);
187 ADD_RANGE(R300_GB_MSPOS0, 5);
188 ADD_RANGE(R300_TX_INVALTAGS, 1);
189 ADD_RANGE(R300_TX_ENABLE, 1);
190 ADD_RANGE(0x4200, 4);
191 ADD_RANGE(0x4214, 1);
192 ADD_RANGE(R300_RE_POINTSIZE, 1);
193 ADD_RANGE(0x4230, 3);
194 ADD_RANGE(R300_RE_LINE_CNT, 1);
195 ADD_RANGE(R300_RE_UNK4238, 1);
196 ADD_RANGE(0x4260, 3);
197 ADD_RANGE(R300_RE_SHADE, 4);
198 ADD_RANGE(R300_RE_POLYGON_MODE, 5);
199 ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
200 ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
201 ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
202 ADD_RANGE(R300_RE_CULL_CNTL, 1);
203 ADD_RANGE(0x42C0, 2);
204 ADD_RANGE(R300_RS_CNTL_0, 2);
206 ADD_RANGE(R300_SC_HYPERZ, 2);
207 ADD_RANGE(0x43E8, 1);
209 ADD_RANGE(0x46A4, 5);
211 ADD_RANGE(R300_RE_FOG_STATE, 1);
212 ADD_RANGE(R300_FOG_COLOR_R, 3);
213 ADD_RANGE(R300_PP_ALPHA_TEST, 2);
214 ADD_RANGE(0x4BD8, 1);
215 ADD_RANGE(R300_PFS_PARAM_0_X, 64);
216 ADD_RANGE(0x4E00, 1);
217 ADD_RANGE(R300_RB3D_CBLEND, 2);
218 ADD_RANGE(R300_RB3D_COLORMASK, 1);
219 ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
220 ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */
221 ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
222 ADD_RANGE(0x4E50, 9);
223 ADD_RANGE(0x4E88, 1);
224 ADD_RANGE(0x4EA0, 2);
225 ADD_RANGE(R300_ZB_CNTL, 3);
226 ADD_RANGE(R300_ZB_FORMAT, 4);
227 ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */
228 ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
229 ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
230 ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
232 ADD_RANGE(R300_TX_FILTER_0, 16);
233 ADD_RANGE(R300_TX_FILTER1_0, 16);
234 ADD_RANGE(R300_TX_SIZE_0, 16);
235 ADD_RANGE(R300_TX_FORMAT_0, 16);
236 ADD_RANGE(R300_TX_PITCH_0, 16);
237 /* Texture offset is dangerous and needs more checking */
238 ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
239 ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
240 ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
242 /* Sporadic registers used as primitives are emitted */
243 ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
244 ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
245 ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
246 ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
248 ADD_RANGE(R500_SU_REG_DEST, 1);
249 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV410) {
250 ADD_RANGE(R300_DST_PIPE_CONFIG, 1);
253 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
254 ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
255 ADD_RANGE(R500_US_CONFIG, 2);
256 ADD_RANGE(R500_US_CODE_ADDR, 3);
257 ADD_RANGE(R500_US_FC_CTRL, 1);
258 ADD_RANGE(R500_RS_IP_0, 16);
259 ADD_RANGE(R500_RS_INST_0, 16);
260 ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
261 ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
262 ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
263 ADD_RANGE(R500_GA_US_VECTOR_INDEX, 2);
265 ADD_RANGE(R300_PFS_CNTL_0, 3);
266 ADD_RANGE(R300_PFS_NODE_0, 4);
267 ADD_RANGE(R300_PFS_TEXI_0, 64);
268 ADD_RANGE(R300_PFS_INSTR0_0, 64);
269 ADD_RANGE(R300_PFS_INSTR1_0, 64);
270 ADD_RANGE(R300_PFS_INSTR2_0, 64);
271 ADD_RANGE(R300_PFS_INSTR3_0, 64);
272 ADD_RANGE(R300_RS_INTERP_0, 8);
273 ADD_RANGE(R300_RS_ROUTE_0, 8);
277 /* add 2d blit engine registers for DDX */
278 ADD_RANGE(RADEON_SRC_Y_X, 3); /* 1434, 1438, 143c,
279 SRC_Y_X, DST_Y_X, DST_HEIGHT_WIDTH
281 ADD_RANGE(RADEON_DP_GUI_MASTER_CNTL, 1); /* 146c */
282 ADD_RANGE(RADEON_DP_BRUSH_BKGD_CLR, 2); /* 1478, 147c */
283 ADD_RANGE(RADEON_DP_SRC_FRGD_CLR, 2); /* 15d8, 15dc */
284 ADD_RANGE(RADEON_DP_CNTL, 1); /* 16c0 */
285 ADD_RANGE(RADEON_DP_WRITE_MASK, 1); /* 16cc */
286 ADD_RANGE(RADEON_DEFAULT_SC_BOTTOM_RIGHT, 1); /* 16e8 */
288 ADD_RANGE(RADEON_DSTCACHE_CTLSTAT, 1);
289 ADD_RANGE(RADEON_WAIT_UNTIL, 1);
291 ADD_RANGE_MARK(RADEON_DST_OFFSET, 1, MARK_CHECK_OFFSET);
292 ADD_RANGE_MARK(RADEON_SRC_OFFSET, 1, MARK_CHECK_OFFSET);
294 ADD_RANGE_MARK(RADEON_DST_PITCH_OFFSET, 1, MARK_CHECK_OFFSET);
295 ADD_RANGE_MARK(RADEON_SRC_PITCH_OFFSET, 1, MARK_CHECK_OFFSET);
298 ADD_RANGE_MARK(R300_SC_SCISSOR0, 2, MARK_CHECK_SCISSOR);
300 ADD_RANGE(R300_SC_CLIP_0_A, 2);
301 ADD_RANGE(R300_SC_CLIP_RULE, 1);
302 ADD_RANGE(R300_SC_SCREENDOOR, 1);
304 ADD_RANGE(R300_VAP_PVS_CODE_CNTL_0, 4);
305 ADD_RANGE(R300_VAP_PVS_VECTOR_INDX_REG, 2);
307 if (dev_priv->chip_family <= CHIP_RV280) {
308 ADD_RANGE(RADEON_RE_TOP_LEFT, 1);
309 ADD_RANGE(RADEON_RE_WIDTH_HEIGHT, 1);
310 ADD_RANGE(RADEON_AUX_SC_CNTL, 1);
311 ADD_RANGE(RADEON_RB3D_DSTCACHE_CTLSTAT, 1);
315 int r300_check_range(unsigned reg, int count)
320 for (i = (reg >> 2); i < (reg >> 2) + count; i++)
321 if (r300_reg_flags[i] != MARK_SAFE)
326 int r300_get_reg_flags(unsigned reg)
330 return r300_reg_flags[(reg >> 2)];
333 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
335 drm_radeon_kcmd_buffer_t
337 drm_r300_cmd_header_t
346 sz = header.packet0.count;
347 reg = (header.packet0.reghi << 8) | header.packet0.reglo;
349 if ((sz > 64) || (sz < 0)) {
351 ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
355 for (i = 0; i < sz; i++) {
356 values[i] = ((int *)cmdbuf->buf)[i];
357 switch (r300_reg_flags[(reg >> 2) + i]) {
360 case MARK_CHECK_OFFSET:
361 if (!radeon_check_offset(dev_priv, (u32) values[i])) {
363 ("Offset failed range check (reg=%04x sz=%d)\n",
369 DRM_ERROR("Register %04x failed check as flag=%02x\n",
370 reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
376 OUT_RING(CP_PACKET0(reg, sz - 1));
377 OUT_RING_TABLE(values, sz);
380 cmdbuf->buf += sz * 4;
381 cmdbuf->bufsz -= sz * 4;
387 * Emits a packet0 setting arbitrary registers.
388 * Called by r300_do_cp_cmdbuf.
390 * Note that checks are performed on contents and addresses of the registers
392 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
393 drm_radeon_kcmd_buffer_t *cmdbuf,
394 drm_r300_cmd_header_t header)
400 sz = header.packet0.count;
401 reg = (header.packet0.reghi << 8) | header.packet0.reglo;
403 DRM_DEBUG("R300_CMD_PACKET0: reg %04x, sz %d\n", reg, sz);
407 if (sz * 4 > cmdbuf->bufsz)
410 if (reg + sz * 4 >= 0x10000) {
411 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
416 if (r300_check_range(reg, sz)) {
417 /* go and check everything */
418 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
421 /* the rest of the data is safe to emit, whatever the values the user passed */
424 OUT_RING(CP_PACKET0(reg, sz - 1));
425 OUT_RING_TABLE((int *)cmdbuf->buf, sz);
428 cmdbuf->buf += sz * 4;
429 cmdbuf->bufsz -= sz * 4;
435 * Uploads user-supplied vertex program instructions or parameters onto
437 * Called by r300_do_cp_cmdbuf.
439 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
440 drm_radeon_kcmd_buffer_t *cmdbuf,
441 drm_r300_cmd_header_t header)
447 sz = header.vpu.count;
448 addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
452 if (sz * 16 > cmdbuf->bufsz)
455 /* VAP is very sensitive so we purge cache before we program it
456 * and we also flush its state before & after */
458 OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
459 OUT_RING(R300_RB3D_DC_FLUSH);
460 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
461 OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
462 OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
466 dev_priv->track_flush |= RADEON_FLUSH_EMITED;
468 BEGIN_RING(3 + sz * 4);
469 OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
470 OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
471 OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
475 OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
479 cmdbuf->buf += sz * 16;
480 cmdbuf->bufsz -= sz * 16;
486 * Emit a clear packet from userspace.
487 * Called by r300_emit_packet3.
489 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
490 drm_radeon_kcmd_buffer_t *cmdbuf)
494 if (8 * 4 > cmdbuf->bufsz)
498 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
499 OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
500 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
501 OUT_RING_TABLE((int *)cmdbuf->buf, 8);
505 OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
506 OUT_RING(R300_RB3D_DC_FLUSH);
507 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
508 OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
511 dev_priv->track_flush |= RADEON_FLUSH_EMITED;
513 cmdbuf->buf += 8 * 4;
514 cmdbuf->bufsz -= 8 * 4;
519 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
520 drm_radeon_kcmd_buffer_t *cmdbuf,
524 #define MAX_ARRAY_PACKET 64
525 u32 payload[MAX_ARRAY_PACKET];
529 count = (header >> 16) & 0x3fff;
531 if ((count + 1) > MAX_ARRAY_PACKET) {
532 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
536 memset(payload, 0, MAX_ARRAY_PACKET * 4);
537 memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
539 /* carefully check packet contents */
541 narrays = payload[0];
544 while ((k < narrays) && (i < (count + 1))) {
545 i++; /* skip attribute field */
546 if (!radeon_check_offset(dev_priv, payload[i])) {
548 ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
556 /* have one more to process, they come in pairs */
557 if (!radeon_check_offset(dev_priv, payload[i])) {
559 ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
566 /* do the counts match what we expect ? */
567 if ((k != narrays) || (i != (count + 1))) {
569 ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
570 k, i, narrays, count + 1);
574 /* all clear, output packet */
576 BEGIN_RING(count + 2);
578 OUT_RING_TABLE(payload, count + 1);
581 cmdbuf->buf += (count + 2) * 4;
582 cmdbuf->bufsz -= (count + 2) * 4;
587 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
588 drm_radeon_kcmd_buffer_t *cmdbuf)
590 u32 *cmd = (u32 *) cmdbuf->buf;
594 count=(cmd[0]>>16) & 0x3fff;
596 if (cmd[0] & 0x8000) {
599 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
600 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
601 offset = cmd[2] << 10;
602 ret = !radeon_check_offset(dev_priv, offset);
604 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
609 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
610 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
611 offset = cmd[3] << 10;
612 ret = !radeon_check_offset(dev_priv, offset);
614 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
623 OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
626 cmdbuf->buf += (count+2)*4;
627 cmdbuf->bufsz -= (count+2)*4;
632 static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
633 drm_radeon_kcmd_buffer_t *cmdbuf)
640 cmd = (u32 *) cmdbuf->buf;
641 count = (cmd[0]>>16) & 0x3fff;
642 expected_count = cmd[1] >> 16;
643 if (!(cmd[1] & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
644 expected_count = (expected_count+1)/2;
646 if (count && count != expected_count) {
647 DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
648 count, expected_count);
654 OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
657 cmdbuf->buf += (count+2)*4;
658 cmdbuf->bufsz -= (count+2)*4;
661 drm_r300_cmd_header_t header;
663 if (cmdbuf->bufsz < 4*4 + sizeof(header)) {
664 DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
668 header.u = *(unsigned int *)cmdbuf->buf;
670 cmdbuf->buf += sizeof(header);
671 cmdbuf->bufsz -= sizeof(header);
672 cmd = (u32 *) cmdbuf->buf;
674 if (header.header.cmd_type != R300_CMD_PACKET3 ||
675 header.packet3.packet != R300_CMD_PACKET3_RAW ||
676 cmd[0] != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
677 DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
681 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
682 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
685 if (!radeon_check_offset(dev_priv, cmd[2])) {
686 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
689 if (cmd[3] != expected_count) {
690 DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
691 cmd[3], expected_count);
697 OUT_RING_TABLE((int *)(cmdbuf->buf + 4), 3);
701 cmdbuf->bufsz -= 4*4;
707 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
708 drm_radeon_kcmd_buffer_t *cmdbuf)
714 if (4 > cmdbuf->bufsz)
717 /* Fixme !! This simply emits a packet without much checking.
718 We need to be smarter. */
720 /* obtain first word - actual packet3 header */
721 header = *(u32 *) cmdbuf->buf;
723 /* Is it packet 3 ? */
724 if ((header >> 30) != 0x3) {
725 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
729 count = (header >> 16) & 0x3fff;
731 /* Check again now that we know how much data to expect */
732 if ((count + 2) * 4 > cmdbuf->bufsz) {
734 ("Expected packet3 of length %d but have only %d bytes left\n",
735 (count + 2) * 4, cmdbuf->bufsz);
739 /* Is it a packet type we know about ? */
740 switch (header & 0xff00) {
741 case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
742 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
744 case RADEON_CNTL_BITBLT_MULTI:
745 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
747 case RADEON_CP_INDX_BUFFER:
748 DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
750 case RADEON_CP_3D_DRAW_IMMD_2:
751 /* triggers drawing using in-packet vertex data */
752 case RADEON_CP_3D_DRAW_VBUF_2:
753 /* triggers drawing of vertex buffers setup elsewhere */
754 dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
755 RADEON_PURGE_EMITED);
757 case RADEON_CP_3D_DRAW_INDX_2:
758 /* triggers drawing using indices to vertex buffer */
759 /* whenever we send vertex we clear flush & purge */
760 dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
761 RADEON_PURGE_EMITED);
762 return r300_emit_draw_indx_2(dev_priv, cmdbuf);
763 case RADEON_WAIT_FOR_IDLE:
765 /* these packets are safe */
768 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
772 BEGIN_RING(count + 2);
774 OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
777 cmdbuf->buf += (count + 2) * 4;
778 cmdbuf->bufsz -= (count + 2) * 4;
784 * Emit a rendering packet3 from userspace.
785 * Called by r300_do_cp_cmdbuf.
787 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
788 drm_radeon_kcmd_buffer_t *cmdbuf,
789 drm_r300_cmd_header_t header)
793 char *orig_buf = cmdbuf->buf;
794 int orig_bufsz = cmdbuf->bufsz;
796 /* This is a do-while-loop so that we run the interior at least once,
797 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
801 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
802 ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
806 cmdbuf->buf = orig_buf;
807 cmdbuf->bufsz = orig_bufsz;
810 switch (header.packet3.packet) {
811 case R300_CMD_PACKET3_CLEAR:
812 DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
813 ret = r300_emit_clear(dev_priv, cmdbuf);
815 DRM_ERROR("r300_emit_clear failed\n");
820 case R300_CMD_PACKET3_RAW:
821 DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
822 ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
824 DRM_ERROR("r300_emit_raw_packet3 failed\n");
830 DRM_ERROR("bad packet3 type %i at %p\n",
831 header.packet3.packet,
832 cmdbuf->buf - sizeof(header));
836 n += R300_SIMULTANEOUS_CLIPRECTS;
837 } while (n < cmdbuf->nbox);
842 /* Some of the R300 chips seem to be extremely touchy about the two registers
843 * that are configured in r300_pacify.
844 * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
845 * sends a command buffer that contains only state setting commands and a
846 * vertex program/parameter upload sequence, this will eventually lead to a
847 * lockup, unless the sequence is bracketed by calls to r300_pacify.
848 * So we should take great care to *always* call r300_pacify before
849 * *anything* 3D related, and again afterwards. This is what the
850 * call bracket in r300_do_cp_cmdbuf is for.
854 * Emit the sequence to pacify R300.
856 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
858 uint32_t cache_z, cache_3d, cache_2d;
861 cache_z = R300_ZC_FLUSH;
862 cache_2d = R300_DC_FLUSH_2D;
863 cache_3d = R300_DC_FLUSH_3D;
864 if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
865 /* we can purge, primitive where draw since last purge */
866 cache_z |= R300_ZC_FREE;
867 cache_2d |= R300_DC_FREE_2D;
868 cache_3d |= R300_DC_FREE_3D;
871 /* flush & purge zbuffer */
873 OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
876 /* flush & purge 3d */
878 OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
881 /* flush & purge texture */
883 OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
886 /* FIXME: is this one really needed ? */
888 OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
892 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
893 OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
895 /* flush & purge 2d through E2 as RB2D will trigger lockup */
897 OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
899 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
900 OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
901 RADEON_WAIT_HOST_IDLECLEAN);
903 /* set flush & purge flags */
904 dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
908 * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
909 * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
910 * be careful about how this function is called.
912 static void r300_discard_buffer(struct drm_device * dev, struct drm_master *master, struct drm_buf * buf)
914 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
915 struct drm_radeon_master_private *master_priv = master->driver_priv;
917 buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
922 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
923 drm_r300_cmd_header_t header)
928 if (!header.wait.flags)
933 switch(header.wait.flags) {
935 wait_until = RADEON_WAIT_2D_IDLE;
938 wait_until = RADEON_WAIT_3D_IDLE;
940 case R300_NEW_WAIT_2D_3D:
941 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
943 case R300_NEW_WAIT_2D_2D_CLEAN:
944 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
946 case R300_NEW_WAIT_3D_3D_CLEAN:
947 wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
949 case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
950 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
951 wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
958 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
959 OUT_RING(wait_until);
963 static int r300_scratch(drm_radeon_private_t *dev_priv,
964 drm_radeon_kcmd_buffer_t *cmdbuf,
965 drm_r300_cmd_header_t header)
968 u32 i, buf_idx, h_pending;
971 if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
975 if (header.scratch.reg >= 5) {
979 dev_priv->scratch_ages[header.scratch.reg] ++;
981 ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
983 cmdbuf->buf += sizeof(uint64_t);
984 cmdbuf->bufsz -= sizeof(uint64_t);
986 for (i=0; i < header.scratch.n_bufs; i++) {
987 buf_idx = *(u32 *)cmdbuf->buf;
988 buf_idx *= 2; /* 8 bytes per buf */
990 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
994 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
998 if (h_pending == 0) {
1004 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
1008 cmdbuf->buf += sizeof(buf_idx);
1009 cmdbuf->bufsz -= sizeof(buf_idx);
1013 OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
1014 OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
1021 * Uploads user-supplied vertex program instructions or parameters onto
1022 * the graphics card.
1023 * Called by r300_do_cp_cmdbuf.
1025 static __inline__ int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
1026 drm_radeon_kcmd_buffer_t *cmdbuf,
1027 drm_r300_cmd_header_t header)
1036 sz = header.r500fp.count;
1037 /* address is 9 bits 0 - 8, bit 1 of flags is part of address */
1038 addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
1040 type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
1041 clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
1043 addr |= (type << 16);
1044 addr |= (clamp << 17);
1046 stride = type ? 4 : 6;
1048 DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
1051 if (sz * stride * 4 > cmdbuf->bufsz)
1054 BEGIN_RING(3 + sz * stride);
1055 OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
1056 OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
1057 OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
1061 cmdbuf->buf += sz * stride * 4;
1062 cmdbuf->bufsz -= sz * stride * 4;
1069 * Parses and validates a user-supplied command buffer and emits appropriate
1070 * commands on the DMA ring buffer.
1071 * Called by the ioctl handler function radeon_cp_cmdbuf.
1073 int r300_do_cp_cmdbuf(struct drm_device *dev,
1074 struct drm_file *file_priv,
1075 drm_radeon_kcmd_buffer_t *cmdbuf)
1077 drm_radeon_private_t *dev_priv = dev->dev_private;
1078 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1079 struct drm_device_dma *dma = dev->dma;
1080 struct drm_buf *buf = NULL;
1081 int emit_dispatch_age = 0;
1087 r300_pacify(dev_priv);
1089 if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1090 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1095 while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
1097 drm_r300_cmd_header_t header;
1099 header.u = *(unsigned int *)cmdbuf->buf;
1101 cmdbuf->buf += sizeof(header);
1102 cmdbuf->bufsz -= sizeof(header);
1104 switch (header.header.cmd_type) {
1105 case R300_CMD_PACKET0:
1106 ret = r300_emit_packet0(dev_priv, cmdbuf, header);
1108 DRM_ERROR("r300_emit_packet0 failed\n");
1114 DRM_DEBUG("R300_CMD_VPU\n");
1115 ret = r300_emit_vpu(dev_priv, cmdbuf, header);
1117 DRM_ERROR("r300_emit_vpu failed\n");
1122 case R300_CMD_PACKET3:
1123 DRM_DEBUG("R300_CMD_PACKET3\n");
1124 ret = r300_emit_packet3(dev_priv, cmdbuf, header);
1126 DRM_ERROR("r300_emit_packet3 failed\n");
1131 case R300_CMD_END3D:
1132 DRM_DEBUG("R300_CMD_END3D\n");
1134 Ideally userspace driver should not need to issue this call,
1135 i.e. the drm driver should issue it automatically and prevent
1138 In practice, we do not understand why this call is needed and what
1139 it does (except for some vague guesses that it has to do with cache
1140 coherence) and so the user space driver does it.
1142 Once we are sure which uses prevent lockups the code could be moved
1143 into the kernel and the userspace driver will not
1144 need to use this command.
1146 Note that issuing this command does not hurt anything
1147 except, possibly, performance */
1148 r300_pacify(dev_priv);
1151 case R300_CMD_CP_DELAY:
1152 /* simple enough, we can do it here */
1153 DRM_DEBUG("R300_CMD_CP_DELAY\n");
1158 BEGIN_RING(header.delay.count);
1159 for (i = 0; i < header.delay.count; i++)
1160 OUT_RING(RADEON_CP_PACKET2);
1165 case R300_CMD_DMA_DISCARD:
1166 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1167 idx = header.dma.buf_idx;
1168 if (idx < 0 || idx >= dma->buf_count) {
1169 DRM_ERROR("buffer index %d (of %d max)\n",
1170 idx, dma->buf_count - 1);
1175 buf = dma->buflist[idx];
1176 if (buf->file_priv != file_priv || buf->pending) {
1177 DRM_ERROR("bad buffer %p %p %d\n",
1178 buf->file_priv, file_priv,
1184 emit_dispatch_age = 1;
1185 r300_discard_buffer(dev, file_priv->master, buf);
1189 DRM_DEBUG("R300_CMD_WAIT\n");
1190 r300_cmd_wait(dev_priv, header);
1193 case R300_CMD_SCRATCH:
1194 DRM_DEBUG("R300_CMD_SCRATCH\n");
1195 ret = r300_scratch(dev_priv, cmdbuf, header);
1197 DRM_ERROR("r300_scratch failed\n");
1202 case R300_CMD_R500FP:
1203 if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1204 DRM_ERROR("Calling r500 command on r300 card\n");
1208 DRM_DEBUG("R300_CMD_R500FP\n");
1209 ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
1211 DRM_ERROR("r300_emit_r500fp failed\n");
1216 DRM_ERROR("bad cmd_type %i at %p\n",
1217 header.header.cmd_type,
1218 cmdbuf->buf - sizeof(header));
1227 r300_pacify(dev_priv);
1229 /* We emit the vertex buffer age here, outside the pacifier "brackets"
1231 * (1) This may coalesce multiple age emissions into a single one and
1232 * (2) more importantly, some chips lock up hard when scratch registers
1233 * are written inside the pacifier bracket.
1235 if (emit_dispatch_age) {
1238 /* Emit the vertex buffer age */
1240 RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);