OSDN Git Service

Merge branch 'modesetting-gem' of git+ssh://agd5f@git.freedesktop.org/git/mesa/drm...
[android-x86/external-libdrm.git] / shared-core / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         struct drm_clip_rect box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return -EFAULT;
78                         }
79
80                         box.x2--; /* Hardware expects inclusive bottom-right corner */
81                         box.y2--;
82
83                         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
84                                 box.x1 = (box.x1) &
85                                         R300_CLIPRECT_MASK;
86                                 box.y1 = (box.y1) &
87                                         R300_CLIPRECT_MASK;
88                                 box.x2 = (box.x2) &
89                                         R300_CLIPRECT_MASK;
90                                 box.y2 = (box.y2) &
91                                         R300_CLIPRECT_MASK;
92                         } else {
93                                 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
94                                         R300_CLIPRECT_MASK;
95                                 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
96                                         R300_CLIPRECT_MASK;
97                                 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
98                                         R300_CLIPRECT_MASK;
99                                 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
100                                         R300_CLIPRECT_MASK;
101                         }
102
103                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
104                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
105                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
106                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
107
108                 }
109
110                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
111
112                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
113                  * client might be able to trample over memory.
114                  * The impact should be very limited, but I'd rather be safe than
115                  * sorry.
116                  */
117                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
118                 OUT_RING(0);
119                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
120                 ADVANCE_RING();
121         } else {
122                 /* Why we allow zero cliprect rendering:
123                  * There are some commands in a command buffer that must be submitted
124                  * even when there are no cliprects, e.g. DMA buffer discard
125                  * or state setting (though state setting could be avoided by
126                  * simulating a loss of context).
127                  *
128                  * Now since the cmdbuf interface is so chaotic right now (and is
129                  * bound to remain that way for a bit until things settle down),
130                  * it is basically impossible to filter out the commands that are
131                  * necessary and those that aren't.
132                  *
133                  * So I choose the safe way and don't do any filtering at all;
134                  * instead, I simply set up the engine so that all rendering
135                  * can't produce any fragments.
136                  */
137                 BEGIN_RING(2);
138                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
139                 ADVANCE_RING();
140         }
141
142         /* flus cache and wait idle clean after cliprect change */
143         BEGIN_RING(2);
144         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
145         OUT_RING(R300_RB3D_DC_FLUSH);
146         ADVANCE_RING();
147         BEGIN_RING(2);
148         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
149         OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
150         ADVANCE_RING();
151         /* set flush flag */
152         dev_priv->track_flush |= RADEON_FLUSH_EMITED;
153
154         return 0;
155 }
156
157 static u8 r300_reg_flags[0x10000 >> 2];
158
159 void r300_init_reg_flags(struct drm_device *dev)
160 {
161         int i;
162         drm_radeon_private_t *dev_priv = dev->dev_private;
163
164         memset(r300_reg_flags, 0, 0x10000 >> 2);
165 #define ADD_RANGE_MARK(reg, count,mark) \
166                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
167                         r300_reg_flags[i]|=(mark);
168
169
170 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
171
172         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
173         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
174         ADD_RANGE(R300_VAP_CNTL, 1);
175         ADD_RANGE(R300_SE_VTE_CNTL, 2);
176         ADD_RANGE(0x2134, 2);
177         ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
178         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
179         ADD_RANGE(0x21DC, 1);
180         ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
181         ADD_RANGE(R300_VAP_CLIP_X_0, 4);
182         ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
183         ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
184         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
185         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
186         ADD_RANGE(R300_GB_ENABLE, 1);
187         ADD_RANGE(R300_GB_MSPOS0, 5);
188         ADD_RANGE(R300_TX_INVALTAGS, 1);
189         ADD_RANGE(R300_TX_ENABLE, 1);
190         ADD_RANGE(0x4200, 4);
191         ADD_RANGE(0x4214, 1);
192         ADD_RANGE(R300_RE_POINTSIZE, 1);
193         ADD_RANGE(0x4230, 3);
194         ADD_RANGE(R300_RE_LINE_CNT, 1);
195         ADD_RANGE(R300_RE_UNK4238, 1);
196         ADD_RANGE(0x4260, 3);
197         ADD_RANGE(R300_RE_SHADE, 4);
198         ADD_RANGE(R300_RE_POLYGON_MODE, 5);
199         ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
200         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
201         ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
202         ADD_RANGE(R300_RE_CULL_CNTL, 1);
203         ADD_RANGE(0x42C0, 2);
204         ADD_RANGE(R300_RS_CNTL_0, 2);
205
206         ADD_RANGE(R300_SC_HYPERZ, 2);
207         ADD_RANGE(0x43E8, 1);
208
209         ADD_RANGE(0x46A4, 5);
210
211         ADD_RANGE(R300_RE_FOG_STATE, 1);
212         ADD_RANGE(R300_FOG_COLOR_R, 3);
213         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
214         ADD_RANGE(0x4BD8, 1);
215         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
216         ADD_RANGE(0x4E00, 1);
217         ADD_RANGE(R300_RB3D_CBLEND, 2);
218         ADD_RANGE(R300_RB3D_COLORMASK, 1);
219         ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
220         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
221         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
222         ADD_RANGE(0x4E50, 9);
223         ADD_RANGE(0x4E88, 1);
224         ADD_RANGE(0x4EA0, 2);
225         ADD_RANGE(R300_ZB_CNTL, 3);
226         ADD_RANGE(R300_ZB_FORMAT, 4);
227         ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);      /* check offset */
228         ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
229         ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
230         ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
231
232         ADD_RANGE(R300_TX_FILTER_0, 16);
233         ADD_RANGE(R300_TX_FILTER1_0, 16);
234         ADD_RANGE(R300_TX_SIZE_0, 16);
235         ADD_RANGE(R300_TX_FORMAT_0, 16);
236         ADD_RANGE(R300_TX_PITCH_0, 16);
237         /* Texture offset is dangerous and needs more checking */
238         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
239         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
240         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
241
242         /* Sporadic registers used as primitives are emitted */
243         ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
244         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
245         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
246         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
247
248         ADD_RANGE(R500_SU_REG_DEST, 1);
249         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV410) {
250                 ADD_RANGE(R300_DST_PIPE_CONFIG, 1);
251         }
252
253         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
254                 ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
255                 ADD_RANGE(R500_US_CONFIG, 2);
256                 ADD_RANGE(R500_US_CODE_ADDR, 3);
257                 ADD_RANGE(R500_US_FC_CTRL, 1);
258                 ADD_RANGE(R500_RS_IP_0, 16);
259                 ADD_RANGE(R500_RS_INST_0, 16);
260                 ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
261                 ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
262                 ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
263                 ADD_RANGE(R500_GA_US_VECTOR_INDEX, 2);
264         } else {
265                 ADD_RANGE(R300_PFS_CNTL_0, 3);
266                 ADD_RANGE(R300_PFS_NODE_0, 4);
267                 ADD_RANGE(R300_PFS_TEXI_0, 64);
268                 ADD_RANGE(R300_PFS_INSTR0_0, 64);
269                 ADD_RANGE(R300_PFS_INSTR1_0, 64);
270                 ADD_RANGE(R300_PFS_INSTR2_0, 64);
271                 ADD_RANGE(R300_PFS_INSTR3_0, 64);
272                 ADD_RANGE(R300_RS_INTERP_0, 8);
273                 ADD_RANGE(R300_RS_ROUTE_0, 8);
274
275         }
276
277         /* add 2d blit engine registers for DDX */
278         ADD_RANGE(RADEON_SRC_Y_X, 3); /* 1434, 1438, 143c, 
279                                          SRC_Y_X, DST_Y_X, DST_HEIGHT_WIDTH
280                                        */
281         ADD_RANGE(RADEON_DP_GUI_MASTER_CNTL, 1); /* 146c */
282         ADD_RANGE(RADEON_DP_BRUSH_BKGD_CLR, 2); /* 1478, 147c */
283         ADD_RANGE(RADEON_DP_SRC_FRGD_CLR, 2); /* 15d8, 15dc */
284         ADD_RANGE(RADEON_DP_CNTL, 1); /* 16c0 */
285         ADD_RANGE(RADEON_DP_WRITE_MASK, 1); /* 16cc */
286         ADD_RANGE(RADEON_DEFAULT_SC_BOTTOM_RIGHT, 1); /* 16e8 */
287
288         ADD_RANGE(RADEON_DSTCACHE_CTLSTAT, 1);
289         ADD_RANGE(RADEON_WAIT_UNTIL, 1);
290
291         ADD_RANGE_MARK(RADEON_DST_OFFSET, 1, MARK_CHECK_OFFSET);
292         ADD_RANGE_MARK(RADEON_SRC_OFFSET, 1, MARK_CHECK_OFFSET);
293
294         ADD_RANGE_MARK(RADEON_DST_PITCH_OFFSET, 1, MARK_CHECK_OFFSET);
295         ADD_RANGE_MARK(RADEON_SRC_PITCH_OFFSET, 1, MARK_CHECK_OFFSET);
296
297         /* TODO SCISSOR */
298         ADD_RANGE_MARK(R300_SC_SCISSOR0, 2, MARK_CHECK_SCISSOR);
299
300         ADD_RANGE(R300_SC_CLIP_0_A, 2);
301         ADD_RANGE(R300_SC_CLIP_RULE, 1);
302         ADD_RANGE(R300_SC_SCREENDOOR, 1);
303
304         ADD_RANGE(R300_VAP_PVS_CODE_CNTL_0, 4);
305         ADD_RANGE(R300_VAP_PVS_VECTOR_INDX_REG, 2);
306
307         if (dev_priv->chip_family <= CHIP_RV280) {
308                 ADD_RANGE(RADEON_RE_TOP_LEFT, 1);
309                 ADD_RANGE(RADEON_RE_WIDTH_HEIGHT, 1);
310                 ADD_RANGE(RADEON_AUX_SC_CNTL, 1);
311                 ADD_RANGE(RADEON_RB3D_DSTCACHE_CTLSTAT, 1);
312         }
313 }
314
315 int r300_check_range(unsigned reg, int count)
316 {
317         int i;
318         if (reg & ~0xffff)
319                 return -1;
320         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
321                 if (r300_reg_flags[i] != MARK_SAFE)
322                         return 1;
323         return 0;
324 }
325
326 int r300_get_reg_flags(unsigned reg)
327 {
328         if (reg & ~0xffff)
329                 return -1;
330         return r300_reg_flags[(reg >> 2)];
331 }
332
333 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
334                                                           dev_priv,
335                                                           drm_radeon_kcmd_buffer_t
336                                                           * cmdbuf,
337                                                           drm_r300_cmd_header_t
338                                                           header)
339 {
340         int reg;
341         int sz;
342         int i;
343         int values[64];
344         RING_LOCALS;
345
346         sz = header.packet0.count;
347         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
348
349         if ((sz > 64) || (sz < 0)) {
350                 DRM_ERROR
351                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
352                      reg, sz);
353                 return -EINVAL;
354         }
355         for (i = 0; i < sz; i++) {
356                 values[i] = ((int *)cmdbuf->buf)[i];
357                 switch (r300_reg_flags[(reg >> 2) + i]) {
358                 case MARK_SAFE:
359                         break;
360                 case MARK_CHECK_OFFSET:
361                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
362                                 DRM_ERROR
363                                     ("Offset failed range check (reg=%04x sz=%d)\n",
364                                      reg, sz);
365                                 return -EINVAL;
366                         }
367                         break;
368                 default:
369                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
370                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
371                         return -EINVAL;
372                 }
373         }
374
375         BEGIN_RING(1 + sz);
376         OUT_RING(CP_PACKET0(reg, sz - 1));
377         OUT_RING_TABLE(values, sz);
378         ADVANCE_RING();
379
380         cmdbuf->buf += sz * 4;
381         cmdbuf->bufsz -= sz * 4;
382
383         return 0;
384 }
385
386 /**
387  * Emits a packet0 setting arbitrary registers.
388  * Called by r300_do_cp_cmdbuf.
389  *
390  * Note that checks are performed on contents and addresses of the registers
391  */
392 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
393                                         drm_radeon_kcmd_buffer_t *cmdbuf,
394                                         drm_r300_cmd_header_t header)
395 {
396         int reg;
397         int sz;
398         RING_LOCALS;
399
400         sz = header.packet0.count;
401         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
402
403         DRM_DEBUG("R300_CMD_PACKET0: reg %04x, sz %d\n", reg, sz);
404         if (!sz)
405                 return 0;
406
407         if (sz * 4 > cmdbuf->bufsz)
408                 return -EINVAL;
409
410         if (reg + sz * 4 >= 0x10000) {
411                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
412                           sz);
413                 return -EINVAL;
414         }
415
416         if (r300_check_range(reg, sz)) {
417                 /* go and check everything */
418                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
419                                                            header);
420         }
421         /* the rest of the data is safe to emit, whatever the values the user passed */
422
423         BEGIN_RING(1 + sz);
424         OUT_RING(CP_PACKET0(reg, sz - 1));
425         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
426         ADVANCE_RING();
427
428         cmdbuf->buf += sz * 4;
429         cmdbuf->bufsz -= sz * 4;
430
431         return 0;
432 }
433
434 /**
435  * Uploads user-supplied vertex program instructions or parameters onto
436  * the graphics card.
437  * Called by r300_do_cp_cmdbuf.
438  */
439 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
440                                     drm_radeon_kcmd_buffer_t *cmdbuf,
441                                     drm_r300_cmd_header_t header)
442 {
443         int sz;
444         int addr;
445         RING_LOCALS;
446
447         sz = header.vpu.count;
448         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
449
450         if (!sz)
451                 return 0;
452         if (sz * 16 > cmdbuf->bufsz)
453                 return -EINVAL;
454
455         /* VAP is very sensitive so we purge cache before we program it
456          * and we also flush its state before & after */
457         BEGIN_RING(6);
458         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
459         OUT_RING(R300_RB3D_DC_FLUSH);
460         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
461         OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
462         OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
463         OUT_RING(0);
464         ADVANCE_RING();
465         /* set flush flag */
466         dev_priv->track_flush |= RADEON_FLUSH_EMITED;
467
468         BEGIN_RING(3 + sz * 4);
469         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
470         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
471         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
472         ADVANCE_RING();
473
474         BEGIN_RING(2);
475         OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
476         OUT_RING(0);
477         ADVANCE_RING();
478
479         cmdbuf->buf += sz * 16;
480         cmdbuf->bufsz -= sz * 16;
481
482         return 0;
483 }
484
485 /**
486  * Emit a clear packet from userspace.
487  * Called by r300_emit_packet3.
488  */
489 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
490                                       drm_radeon_kcmd_buffer_t *cmdbuf)
491 {
492         RING_LOCALS;
493
494         if (8 * 4 > cmdbuf->bufsz)
495                 return -EINVAL;
496
497         BEGIN_RING(10);
498         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
499         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
500                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
501         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
502         ADVANCE_RING();
503
504         BEGIN_RING(4);
505         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
506         OUT_RING(R300_RB3D_DC_FLUSH);
507         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
508         OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
509         ADVANCE_RING();
510         /* set flush flag */
511         dev_priv->track_flush |= RADEON_FLUSH_EMITED;
512
513         cmdbuf->buf += 8 * 4;
514         cmdbuf->bufsz -= 8 * 4;
515
516         return 0;
517 }
518
519 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
520                                                drm_radeon_kcmd_buffer_t *cmdbuf,
521                                                u32 header)
522 {
523         int count, i, k;
524 #define MAX_ARRAY_PACKET  64
525         u32 payload[MAX_ARRAY_PACKET];
526         u32 narrays;
527         RING_LOCALS;
528
529         count = (header >> 16) & 0x3fff;
530
531         if ((count + 1) > MAX_ARRAY_PACKET) {
532                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
533                           count);
534                 return -EINVAL;
535         }
536         memset(payload, 0, MAX_ARRAY_PACKET * 4);
537         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
538
539         /* carefully check packet contents */
540
541         narrays = payload[0];
542         k = 0;
543         i = 1;
544         while ((k < narrays) && (i < (count + 1))) {
545                 i++;            /* skip attribute field */
546                 if (!radeon_check_offset(dev_priv, payload[i])) {
547                         DRM_ERROR
548                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
549                              k, i);
550                         return -EINVAL;
551                 }
552                 k++;
553                 i++;
554                 if (k == narrays)
555                         break;
556                 /* have one more to process, they come in pairs */
557                 if (!radeon_check_offset(dev_priv, payload[i])) {
558                         DRM_ERROR
559                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
560                              k, i);
561                         return -EINVAL;
562                 }
563                 k++;
564                 i++;
565         }
566         /* do the counts match what we expect ? */
567         if ((k != narrays) || (i != (count + 1))) {
568                 DRM_ERROR
569                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
570                      k, i, narrays, count + 1);
571                 return -EINVAL;
572         }
573
574         /* all clear, output packet */
575
576         BEGIN_RING(count + 2);
577         OUT_RING(header);
578         OUT_RING_TABLE(payload, count + 1);
579         ADVANCE_RING();
580
581         cmdbuf->buf += (count + 2) * 4;
582         cmdbuf->bufsz -= (count + 2) * 4;
583
584         return 0;
585 }
586
587 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
588                                              drm_radeon_kcmd_buffer_t *cmdbuf)
589 {
590         u32 *cmd = (u32 *) cmdbuf->buf;
591         int count, ret;
592         RING_LOCALS;
593
594         count=(cmd[0]>>16) & 0x3fff;
595
596         if (cmd[0] & 0x8000) {
597                 u32 offset;
598
599                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
600                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
601                         offset = cmd[2] << 10;
602                         ret = !radeon_check_offset(dev_priv, offset);
603                         if (ret) {
604                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
605                                 return -EINVAL;
606                         }
607                 }
608
609                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
610                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
611                         offset = cmd[3] << 10;
612                         ret = !radeon_check_offset(dev_priv, offset);
613                         if (ret) {
614                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
615                                 return -EINVAL;
616                         }
617
618                 }
619         }
620
621         BEGIN_RING(count+2);
622         OUT_RING(cmd[0]);
623         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
624         ADVANCE_RING();
625
626         cmdbuf->buf += (count+2)*4;
627         cmdbuf->bufsz -= (count+2)*4;
628
629         return 0;
630 }
631
632 static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
633                                             drm_radeon_kcmd_buffer_t *cmdbuf)
634 {
635         u32 *cmd;
636         int count;
637         int expected_count;
638         RING_LOCALS;
639
640         cmd = (u32 *) cmdbuf->buf;
641         count = (cmd[0]>>16) & 0x3fff;
642         expected_count = cmd[1] >> 16;
643         if (!(cmd[1] & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
644                 expected_count = (expected_count+1)/2;
645
646         if (count && count != expected_count) {
647                 DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
648                         count, expected_count);
649                 return -EINVAL;
650         }
651
652         BEGIN_RING(count+2);
653         OUT_RING(cmd[0]);
654         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
655         ADVANCE_RING();
656
657         cmdbuf->buf += (count+2)*4;
658         cmdbuf->bufsz -= (count+2)*4;
659
660         if (!count) {
661                 drm_r300_cmd_header_t header;
662
663                 if (cmdbuf->bufsz < 4*4 + sizeof(header)) {
664                         DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
665                         return -EINVAL;
666                 }
667
668                 header.u = *(unsigned int *)cmdbuf->buf;
669
670                 cmdbuf->buf += sizeof(header);
671                 cmdbuf->bufsz -= sizeof(header);
672                 cmd = (u32 *) cmdbuf->buf;
673
674                 if (header.header.cmd_type != R300_CMD_PACKET3 ||
675                     header.packet3.packet != R300_CMD_PACKET3_RAW ||
676                     cmd[0] != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
677                         DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
678                         return -EINVAL;
679                 }
680
681                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
682                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
683                         return -EINVAL;
684                 }
685                 if (!radeon_check_offset(dev_priv, cmd[2])) {
686                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
687                         return -EINVAL;
688                 }
689                 if (cmd[3] != expected_count) {
690                         DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
691                                 cmd[3], expected_count);
692                         return -EINVAL;
693                 }
694
695                 BEGIN_RING(4);
696                 OUT_RING(cmd[0]);
697                 OUT_RING_TABLE((int *)(cmdbuf->buf + 4), 3);
698                 ADVANCE_RING();
699
700                 cmdbuf->buf += 4*4;
701                 cmdbuf->bufsz -= 4*4;
702         }
703
704         return 0;
705 }
706
707 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
708                                             drm_radeon_kcmd_buffer_t *cmdbuf)
709 {
710         u32 header;
711         int count;
712         RING_LOCALS;
713
714         if (4 > cmdbuf->bufsz)
715                 return -EINVAL;
716
717         /* Fixme !! This simply emits a packet without much checking.
718            We need to be smarter. */
719
720         /* obtain first word - actual packet3 header */
721         header = *(u32 *) cmdbuf->buf;
722
723         /* Is it packet 3 ? */
724         if ((header >> 30) != 0x3) {
725                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
726                 return -EINVAL;
727         }
728
729         count = (header >> 16) & 0x3fff;
730
731         /* Check again now that we know how much data to expect */
732         if ((count + 2) * 4 > cmdbuf->bufsz) {
733                 DRM_ERROR
734                     ("Expected packet3 of length %d but have only %d bytes left\n",
735                      (count + 2) * 4, cmdbuf->bufsz);
736                 return -EINVAL;
737         }
738
739         /* Is it a packet type we know about ? */
740         switch (header & 0xff00) {
741         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
742                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
743
744         case RADEON_CNTL_BITBLT_MULTI:
745                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
746
747         case RADEON_CP_INDX_BUFFER:
748                 DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
749                 return -EINVAL;
750         case RADEON_CP_3D_DRAW_IMMD_2:
751                 /* triggers drawing using in-packet vertex data */
752         case RADEON_CP_3D_DRAW_VBUF_2:
753                 /* triggers drawing of vertex buffers setup elsewhere */
754                 dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
755                                            RADEON_PURGE_EMITED);
756                 break;
757         case RADEON_CP_3D_DRAW_INDX_2:
758                 /* triggers drawing using indices to vertex buffer */
759                 /* whenever we send vertex we clear flush & purge */
760                 dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
761                                            RADEON_PURGE_EMITED);
762                 return r300_emit_draw_indx_2(dev_priv, cmdbuf);
763         case RADEON_WAIT_FOR_IDLE:
764         case RADEON_CP_NOP:
765                 /* these packets are safe */
766                 break;
767         default:
768                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
769                 return -EINVAL;
770         }
771
772         BEGIN_RING(count + 2);
773         OUT_RING(header);
774         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
775         ADVANCE_RING();
776
777         cmdbuf->buf += (count + 2) * 4;
778         cmdbuf->bufsz -= (count + 2) * 4;
779
780         return 0;
781 }
782
783 /**
784  * Emit a rendering packet3 from userspace.
785  * Called by r300_do_cp_cmdbuf.
786  */
787 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
788                                         drm_radeon_kcmd_buffer_t *cmdbuf,
789                                         drm_r300_cmd_header_t header)
790 {
791         int n;
792         int ret;
793         char *orig_buf = cmdbuf->buf;
794         int orig_bufsz = cmdbuf->bufsz;
795
796         /* This is a do-while-loop so that we run the interior at least once,
797          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
798          */
799         n = 0;
800         do {
801                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
802                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
803                         if (ret)
804                                 return ret;
805
806                         cmdbuf->buf = orig_buf;
807                         cmdbuf->bufsz = orig_bufsz;
808                 }
809
810                 switch (header.packet3.packet) {
811                 case R300_CMD_PACKET3_CLEAR:
812                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
813                         ret = r300_emit_clear(dev_priv, cmdbuf);
814                         if (ret) {
815                                 DRM_ERROR("r300_emit_clear failed\n");
816                                 return ret;
817                         }
818                         break;
819
820                 case R300_CMD_PACKET3_RAW:
821                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
822                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
823                         if (ret) {
824                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
825                                 return ret;
826                         }
827                         break;
828
829                 default:
830                         DRM_ERROR("bad packet3 type %i at %p\n",
831                                   header.packet3.packet,
832                                   cmdbuf->buf - sizeof(header));
833                         return -EINVAL;
834                 }
835
836                 n += R300_SIMULTANEOUS_CLIPRECTS;
837         } while (n < cmdbuf->nbox);
838
839         return 0;
840 }
841
842 /* Some of the R300 chips seem to be extremely touchy about the two registers
843  * that are configured in r300_pacify.
844  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
845  * sends a command buffer that contains only state setting commands and a
846  * vertex program/parameter upload sequence, this will eventually lead to a
847  * lockup, unless the sequence is bracketed by calls to r300_pacify.
848  * So we should take great care to *always* call r300_pacify before
849  * *anything* 3D related, and again afterwards. This is what the
850  * call bracket in r300_do_cp_cmdbuf is for.
851  */
852
853 /**
854  * Emit the sequence to pacify R300.
855  */
856 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
857 {
858         uint32_t cache_z, cache_3d, cache_2d;
859         RING_LOCALS;
860
861         cache_z = R300_ZC_FLUSH;
862         cache_2d = R300_DC_FLUSH_2D;
863         cache_3d = R300_DC_FLUSH_3D;
864         if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
865                 /* we can purge, primitive where draw since last purge */
866                 cache_z |= R300_ZC_FREE;
867                 cache_2d |= R300_DC_FREE_2D;
868                 cache_3d |= R300_DC_FREE_3D;
869         }
870
871         /* flush & purge zbuffer */
872         BEGIN_RING(2);
873         OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
874         OUT_RING(cache_z);
875         ADVANCE_RING();
876         /* flush & purge 3d */
877         BEGIN_RING(2);
878         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
879         OUT_RING(cache_3d);
880         ADVANCE_RING();
881         /* flush & purge texture */
882         BEGIN_RING(2);
883         OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
884         OUT_RING(0);
885         ADVANCE_RING();
886         /* FIXME: is this one really needed ? */
887         BEGIN_RING(2);
888         OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
889         OUT_RING(0);
890         ADVANCE_RING();
891         BEGIN_RING(2);
892         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
893         OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
894         ADVANCE_RING();
895         /* flush & purge 2d through E2 as RB2D will trigger lockup */
896         BEGIN_RING(4);
897         OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
898         OUT_RING(cache_2d);
899         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
900         OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
901                  RADEON_WAIT_HOST_IDLECLEAN);
902         ADVANCE_RING();
903         /* set flush & purge flags */
904         dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
905 }
906
907 /**
908  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
909  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
910  * be careful about how this function is called.
911  */
912 static void r300_discard_buffer(struct drm_device * dev, struct drm_master *master, struct drm_buf * buf)
913 {
914         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
915         struct drm_radeon_master_private *master_priv = master->driver_priv;
916
917         buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
918         buf->pending = 1;
919         buf->used = 0;
920 }
921
922 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
923                           drm_r300_cmd_header_t header)
924 {
925         u32 wait_until;
926         RING_LOCALS;
927
928         if (!header.wait.flags)
929                 return;
930
931         wait_until = 0;
932
933         switch(header.wait.flags) {
934         case R300_WAIT_2D:
935                 wait_until = RADEON_WAIT_2D_IDLE;
936                 break;
937         case R300_WAIT_3D:
938                 wait_until = RADEON_WAIT_3D_IDLE;
939                 break;
940         case R300_NEW_WAIT_2D_3D:
941                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
942                 break;
943         case R300_NEW_WAIT_2D_2D_CLEAN:
944                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
945                 break;
946         case R300_NEW_WAIT_3D_3D_CLEAN:
947                 wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
948                 break;
949         case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
950                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
951                 wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
952                 break;
953         default:
954                 return;
955         }
956
957         BEGIN_RING(2);
958         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
959         OUT_RING(wait_until);
960         ADVANCE_RING();
961 }
962
963 static int r300_scratch(drm_radeon_private_t *dev_priv,
964                         drm_radeon_kcmd_buffer_t *cmdbuf,
965                         drm_r300_cmd_header_t header)
966 {
967         u32 *ref_age_base;
968         u32 i, buf_idx, h_pending;
969         RING_LOCALS;
970
971         if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
972                 return -EINVAL;
973         }
974
975         if (header.scratch.reg >= 5) {
976                 return -EINVAL;
977         }
978
979         dev_priv->scratch_ages[header.scratch.reg] ++;
980
981         ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
982
983         cmdbuf->buf += sizeof(uint64_t);
984         cmdbuf->bufsz -= sizeof(uint64_t);
985
986         for (i=0; i < header.scratch.n_bufs; i++) {
987                 buf_idx = *(u32 *)cmdbuf->buf;
988                 buf_idx *= 2; /* 8 bytes per buf */
989
990                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
991                         return -EINVAL;
992                 }
993
994                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
995                         return -EINVAL;
996                 }
997
998                 if (h_pending == 0) {
999                         return -EINVAL;
1000                 }
1001
1002                 h_pending--;
1003
1004                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
1005                         return -EINVAL;
1006                 }
1007
1008                 cmdbuf->buf += sizeof(buf_idx);
1009                 cmdbuf->bufsz -= sizeof(buf_idx);
1010         }
1011
1012         BEGIN_RING(2);
1013         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
1014         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
1015         ADVANCE_RING();
1016
1017         return 0;
1018 }
1019
1020 /**
1021  * Uploads user-supplied vertex program instructions or parameters onto
1022  * the graphics card.
1023  * Called by r300_do_cp_cmdbuf.
1024  */
1025 static __inline__ int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
1026                                        drm_radeon_kcmd_buffer_t *cmdbuf,
1027                                        drm_r300_cmd_header_t header)
1028 {
1029         int sz;
1030         int addr;
1031         int type;
1032         int clamp;
1033         int stride;
1034         RING_LOCALS;
1035
1036         sz = header.r500fp.count;
1037         /* address is 9 bits 0 - 8, bit 1 of flags is part of address */
1038         addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
1039
1040         type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
1041         clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
1042
1043         addr |= (type << 16);
1044         addr |= (clamp << 17);
1045
1046         stride = type ? 4 : 6;
1047
1048         DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
1049         if (!sz)
1050                 return 0;
1051         if (sz * stride * 4 > cmdbuf->bufsz)
1052                 return -EINVAL;
1053
1054         BEGIN_RING(3 + sz * stride);
1055         OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
1056         OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
1057         OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
1058
1059         ADVANCE_RING();
1060
1061         cmdbuf->buf += sz * stride * 4;
1062         cmdbuf->bufsz -= sz * stride * 4;
1063
1064         return 0;
1065 }
1066
1067
1068 /**
1069  * Parses and validates a user-supplied command buffer and emits appropriate
1070  * commands on the DMA ring buffer.
1071  * Called by the ioctl handler function radeon_cp_cmdbuf.
1072  */
1073 int r300_do_cp_cmdbuf(struct drm_device *dev,
1074                       struct drm_file *file_priv,
1075                       drm_radeon_kcmd_buffer_t *cmdbuf)
1076 {
1077         drm_radeon_private_t *dev_priv = dev->dev_private;
1078         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1079         struct drm_device_dma *dma = dev->dma;
1080         struct drm_buf *buf = NULL;
1081         int emit_dispatch_age = 0;
1082         int ret = 0;
1083
1084         DRM_DEBUG("\n");
1085
1086         /* pacify */
1087         r300_pacify(dev_priv);
1088
1089         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1090                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1091                 if (ret)
1092                         goto cleanup;
1093         }
1094
1095         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
1096                 int idx;
1097                 drm_r300_cmd_header_t header;
1098
1099                 header.u = *(unsigned int *)cmdbuf->buf;
1100
1101                 cmdbuf->buf += sizeof(header);
1102                 cmdbuf->bufsz -= sizeof(header);
1103
1104                 switch (header.header.cmd_type) {
1105                 case R300_CMD_PACKET0:
1106                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
1107                         if (ret) {
1108                                 DRM_ERROR("r300_emit_packet0 failed\n");
1109                                 goto cleanup;
1110                         }
1111                         break;
1112
1113                 case R300_CMD_VPU:
1114                         DRM_DEBUG("R300_CMD_VPU\n");
1115                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
1116                         if (ret) {
1117                                 DRM_ERROR("r300_emit_vpu failed\n");
1118                                 goto cleanup;
1119                         }
1120                         break;
1121
1122                 case R300_CMD_PACKET3:
1123                         DRM_DEBUG("R300_CMD_PACKET3\n");
1124                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
1125                         if (ret) {
1126                                 DRM_ERROR("r300_emit_packet3 failed\n");
1127                                 goto cleanup;
1128                         }
1129                         break;
1130
1131                 case R300_CMD_END3D:
1132                         DRM_DEBUG("R300_CMD_END3D\n");
1133                         /* TODO:
1134                            Ideally userspace driver should not need to issue this call,
1135                            i.e. the drm driver should issue it automatically and prevent
1136                            lockups.
1137
1138                            In practice, we do not understand why this call is needed and what
1139                            it does (except for some vague guesses that it has to do with cache
1140                            coherence) and so the user space driver does it.
1141
1142                            Once we are sure which uses prevent lockups the code could be moved
1143                            into the kernel and the userspace driver will not
1144                            need to use this command.
1145
1146                            Note that issuing this command does not hurt anything
1147                            except, possibly, performance */
1148                         r300_pacify(dev_priv);
1149                         break;
1150
1151                 case R300_CMD_CP_DELAY:
1152                         /* simple enough, we can do it here */
1153                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
1154                         {
1155                                 int i;
1156                                 RING_LOCALS;
1157
1158                                 BEGIN_RING(header.delay.count);
1159                                 for (i = 0; i < header.delay.count; i++)
1160                                         OUT_RING(RADEON_CP_PACKET2);
1161                                 ADVANCE_RING();
1162                         }
1163                         break;
1164
1165                 case R300_CMD_DMA_DISCARD:
1166                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1167                         idx = header.dma.buf_idx;
1168                         if (idx < 0 || idx >= dma->buf_count) {
1169                                 DRM_ERROR("buffer index %d (of %d max)\n",
1170                                           idx, dma->buf_count - 1);
1171                                 ret = -EINVAL;
1172                                 goto cleanup;
1173                         }
1174
1175                         buf = dma->buflist[idx];
1176                         if (buf->file_priv != file_priv || buf->pending) {
1177                                 DRM_ERROR("bad buffer %p %p %d\n",
1178                                           buf->file_priv, file_priv,
1179                                           buf->pending);
1180                                 ret = -EINVAL;
1181                                 goto cleanup;
1182                         }
1183
1184                         emit_dispatch_age = 1;
1185                         r300_discard_buffer(dev, file_priv->master, buf);
1186                         break;
1187
1188                 case R300_CMD_WAIT:
1189                         DRM_DEBUG("R300_CMD_WAIT\n");
1190                         r300_cmd_wait(dev_priv, header);
1191                         break;
1192
1193                 case R300_CMD_SCRATCH:
1194                         DRM_DEBUG("R300_CMD_SCRATCH\n");
1195                         ret = r300_scratch(dev_priv, cmdbuf, header);
1196                         if (ret) {
1197                                 DRM_ERROR("r300_scratch failed\n");
1198                                 goto cleanup;
1199                         }
1200                         break;
1201
1202                 case R300_CMD_R500FP:
1203                         if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1204                                 DRM_ERROR("Calling r500 command on r300 card\n");
1205                                 ret = -EINVAL;
1206                                 goto cleanup;
1207                         }
1208                         DRM_DEBUG("R300_CMD_R500FP\n");
1209                         ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
1210                         if (ret) {
1211                                 DRM_ERROR("r300_emit_r500fp failed\n");
1212                                 goto cleanup;
1213                         }
1214                         break;
1215                 default:
1216                         DRM_ERROR("bad cmd_type %i at %p\n",
1217                                   header.header.cmd_type,
1218                                   cmdbuf->buf - sizeof(header));
1219                         ret = -EINVAL;
1220                         goto cleanup;
1221                 }
1222         }
1223
1224         DRM_DEBUG("END\n");
1225
1226       cleanup:
1227         r300_pacify(dev_priv);
1228
1229         /* We emit the vertex buffer age here, outside the pacifier "brackets"
1230          * for two reasons:
1231          *  (1) This may coalesce multiple age emissions into a single one and
1232          *  (2) more importantly, some chips lock up hard when scratch registers
1233          *      are written inside the pacifier bracket.
1234          */
1235         if (emit_dispatch_age) {
1236                 RING_LOCALS;
1237
1238                 /* Emit the vertex buffer age */
1239                 BEGIN_RING(2);
1240                 RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);
1241                 ADVANCE_RING();
1242         }
1243
1244         COMMIT_RING();
1245
1246         return ret;
1247 }