OSDN Git Service

Unify radeon offset checking.
[android-x86/external-libdrm.git] / shared-core / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         drm_clip_rect_t box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return DRM_ERR(EFAULT);
78                         }
79
80                         box.x1 =
81                             (box.x1 +
82                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
83                         box.y1 =
84                             (box.y1 +
85                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
86                         box.x2 =
87                             (box.x2 +
88                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
89                         box.y2 =
90                             (box.y2 +
91                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
92
93                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
94                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
95                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
96                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
97                 }
98
99                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
100
101                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
102                  * client might be able to trample over memory.
103                  * The impact should be very limited, but I'd rather be safe than
104                  * sorry.
105                  */
106                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
107                 OUT_RING(0);
108                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
109                 ADVANCE_RING();
110         } else {
111                 /* Why we allow zero cliprect rendering:
112                  * There are some commands in a command buffer that must be submitted
113                  * even when there are no cliprects, e.g. DMA buffer discard
114                  * or state setting (though state setting could be avoided by
115                  * simulating a loss of context).
116                  *
117                  * Now since the cmdbuf interface is so chaotic right now (and is
118                  * bound to remain that way for a bit until things settle down),
119                  * it is basically impossible to filter out the commands that are
120                  * necessary and those that aren't.
121                  *
122                  * So I choose the safe way and don't do any filtering at all;
123                  * instead, I simply set up the engine so that all rendering
124                  * can't produce any fragments.
125                  */
126                 BEGIN_RING(2);
127                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
128                 ADVANCE_RING();
129         }
130
131         return 0;
132 }
133
134 static u8 r300_reg_flags[0x10000 >> 2];
135
136 void r300_init_reg_flags(void)
137 {
138         int i;
139         memset(r300_reg_flags, 0, 0x10000 >> 2);
140 #define ADD_RANGE_MARK(reg, count,mark) \
141                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
142                         r300_reg_flags[i]|=(mark);
143
144 #define MARK_SAFE               1
145 #define MARK_CHECK_OFFSET       2
146
147 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
148
149         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
150         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
151         ADD_RANGE(0x2080, 1);
152         ADD_RANGE(R300_SE_VTE_CNTL, 2);
153         ADD_RANGE(0x2134, 2);
154         ADD_RANGE(0x2140, 1);
155         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
156         ADD_RANGE(0x21DC, 1);
157         ADD_RANGE(0x221C, 1);
158         ADD_RANGE(0x2220, 4);
159         ADD_RANGE(0x2288, 1);
160         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
161         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
162         ADD_RANGE(R300_GB_ENABLE, 1);
163         ADD_RANGE(R300_GB_MSPOS0, 5);
164         ADD_RANGE(R300_TX_CNTL, 1);
165         ADD_RANGE(R300_TX_ENABLE, 1);
166         ADD_RANGE(0x4200, 4);
167         ADD_RANGE(0x4214, 1);
168         ADD_RANGE(R300_RE_POINTSIZE, 1);
169         ADD_RANGE(0x4230, 3);
170         ADD_RANGE(R300_RE_LINE_CNT, 1);
171         ADD_RANGE(0x4238, 1);
172         ADD_RANGE(0x4260, 3);
173         ADD_RANGE(0x4274, 4);
174         ADD_RANGE(0x4288, 5);
175         ADD_RANGE(0x42A0, 1);
176         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
177         ADD_RANGE(0x42B4, 1);
178         ADD_RANGE(R300_RE_CULL_CNTL, 1);
179         ADD_RANGE(0x42C0, 2);
180         ADD_RANGE(R300_RS_CNTL_0, 2);
181         ADD_RANGE(R300_RS_INTERP_0, 8);
182         ADD_RANGE(R300_RS_ROUTE_0, 8);
183         ADD_RANGE(0x43A4, 2);
184         ADD_RANGE(0x43E8, 1);
185         ADD_RANGE(R300_PFS_CNTL_0, 3);
186         ADD_RANGE(R300_PFS_NODE_0, 4);
187         ADD_RANGE(R300_PFS_TEXI_0, 64);
188         ADD_RANGE(0x46A4, 5);
189         ADD_RANGE(R300_PFS_INSTR0_0, 64);
190         ADD_RANGE(R300_PFS_INSTR1_0, 64);
191         ADD_RANGE(R300_PFS_INSTR2_0, 64);
192         ADD_RANGE(R300_PFS_INSTR3_0, 64);
193         ADD_RANGE(0x4BC0, 1);
194         ADD_RANGE(0x4BC8, 3);
195         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
196         ADD_RANGE(0x4BD8, 1);
197         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
198         ADD_RANGE(0x4E00, 1);
199         ADD_RANGE(R300_RB3D_CBLEND, 2);
200         ADD_RANGE(R300_RB3D_COLORMASK, 1);
201         ADD_RANGE(0x4E10, 3);
202         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
203         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
204         ADD_RANGE(0x4E50, 9);
205         ADD_RANGE(0x4E88, 1);
206         ADD_RANGE(0x4EA0, 2);
207         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
208         ADD_RANGE(0x4F10, 4);
209         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
210         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
211         ADD_RANGE(0x4F28, 1);
212         ADD_RANGE(0x4F30, 2);
213         ADD_RANGE(0x4F44, 1);
214         ADD_RANGE(0x4F54, 1);
215
216         ADD_RANGE(R300_TX_FILTER_0, 16);
217         ADD_RANGE(R300_TX_FILTER1_0, 16);
218         ADD_RANGE(R300_TX_SIZE_0, 16);
219         ADD_RANGE(R300_TX_FORMAT_0, 16);
220         ADD_RANGE(R300_TX_PITCH_0, 16);
221         /* Texture offset is dangerous and needs more checking */
222         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
223         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
224         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
225
226         /* Sporadic registers used as primitives are emitted */
227         ADD_RANGE(0x4f18, 1);
228         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
229         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
230         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
231
232 }
233
234 static __inline__ int r300_check_range(unsigned reg, int count)
235 {
236         int i;
237         if (reg & ~0xffff)
238                 return -1;
239         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
240                 if (r300_reg_flags[i] != MARK_SAFE)
241                         return 1;
242         return 0;
243 }
244
245 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
246                                                           dev_priv,
247                                                           drm_radeon_kcmd_buffer_t
248                                                           * cmdbuf,
249                                                           drm_r300_cmd_header_t
250                                                           header)
251 {
252         int reg;
253         int sz;
254         int i;
255         int values[64];
256         RING_LOCALS;
257
258         sz = header.packet0.count;
259         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
260
261         if ((sz > 64) || (sz < 0)) {
262                 DRM_ERROR
263                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
264                      reg, sz);
265                 return DRM_ERR(EINVAL);
266         }
267         for (i = 0; i < sz; i++) {
268                 values[i] = ((int *)cmdbuf->buf)[i];
269                 switch (r300_reg_flags[(reg >> 2) + i]) {
270                 case MARK_SAFE:
271                         break;
272                 case MARK_CHECK_OFFSET:
273                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
274                                 DRM_ERROR
275                                     ("Offset failed range check (reg=%04x sz=%d)\n",
276                                      reg, sz);
277                                 return DRM_ERR(EINVAL);
278                         }
279                         break;
280                 default:
281                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
282                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
283                         return DRM_ERR(EINVAL);
284                 }
285         }
286
287         BEGIN_RING(1 + sz);
288         OUT_RING(CP_PACKET0(reg, sz - 1));
289         OUT_RING_TABLE(values, sz);
290         ADVANCE_RING();
291
292         cmdbuf->buf += sz * 4;
293         cmdbuf->bufsz -= sz * 4;
294
295         return 0;
296 }
297
298 /**
299  * Emits a packet0 setting arbitrary registers.
300  * Called by r300_do_cp_cmdbuf.
301  *
302  * Note that checks are performed on contents and addresses of the registers
303  */
304 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
305                                         drm_radeon_kcmd_buffer_t *cmdbuf,
306                                         drm_r300_cmd_header_t header)
307 {
308         int reg;
309         int sz;
310         RING_LOCALS;
311
312         sz = header.packet0.count;
313         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
314
315         if (!sz)
316                 return 0;
317
318         if (sz * 4 > cmdbuf->bufsz)
319                 return DRM_ERR(EINVAL);
320
321         if (reg + sz * 4 >= 0x10000) {
322                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
323                           sz);
324                 return DRM_ERR(EINVAL);
325         }
326
327         if (r300_check_range(reg, sz)) {
328                 /* go and check everything */
329                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
330                                                            header);
331         }
332         /* the rest of the data is safe to emit, whatever the values the user passed */
333
334         BEGIN_RING(1 + sz);
335         OUT_RING(CP_PACKET0(reg, sz - 1));
336         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
337         ADVANCE_RING();
338
339         cmdbuf->buf += sz * 4;
340         cmdbuf->bufsz -= sz * 4;
341
342         return 0;
343 }
344
345 /**
346  * Uploads user-supplied vertex program instructions or parameters onto
347  * the graphics card.
348  * Called by r300_do_cp_cmdbuf.
349  */
350 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
351                                     drm_radeon_kcmd_buffer_t *cmdbuf,
352                                     drm_r300_cmd_header_t header)
353 {
354         int sz;
355         int addr;
356         RING_LOCALS;
357
358         sz = header.vpu.count;
359         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
360
361         if (!sz)
362                 return 0;
363         if (sz * 16 > cmdbuf->bufsz)
364                 return DRM_ERR(EINVAL);
365
366         BEGIN_RING(5 + sz * 4);
367         /* Wait for VAP to come to senses.. */
368         /* there is no need to emit it multiple times, (only once before VAP is programmed,
369            but this optimization is for later */
370         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
371         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
372         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
373         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
374
375         ADVANCE_RING();
376
377         cmdbuf->buf += sz * 16;
378         cmdbuf->bufsz -= sz * 16;
379
380         return 0;
381 }
382
383 /**
384  * Emit a clear packet from userspace.
385  * Called by r300_emit_packet3.
386  */
387 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
388                                       drm_radeon_kcmd_buffer_t *cmdbuf)
389 {
390         RING_LOCALS;
391
392         if (8 * 4 > cmdbuf->bufsz)
393                 return DRM_ERR(EINVAL);
394
395         BEGIN_RING(10);
396         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
397         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
398                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
399         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
400         ADVANCE_RING();
401
402         cmdbuf->buf += 8 * 4;
403         cmdbuf->bufsz -= 8 * 4;
404
405         return 0;
406 }
407
408 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
409                                                drm_radeon_kcmd_buffer_t *cmdbuf,
410                                                u32 header)
411 {
412         int count, i, k;
413 #define MAX_ARRAY_PACKET  64
414         u32 payload[MAX_ARRAY_PACKET];
415         u32 narrays;
416         RING_LOCALS;
417
418         count = (header >> 16) & 0x3fff;
419
420         if ((count + 1) > MAX_ARRAY_PACKET) {
421                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
422                           count);
423                 return DRM_ERR(EINVAL);
424         }
425         memset(payload, 0, MAX_ARRAY_PACKET * 4);
426         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
427
428         /* carefully check packet contents */
429
430         narrays = payload[0];
431         k = 0;
432         i = 1;
433         while ((k < narrays) && (i < (count + 1))) {
434                 i++;            /* skip attribute field */
435                 if (!radeon_check_offset(dev_priv, payload[i])) {
436                         DRM_ERROR
437                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
438                              k, i);
439                         return DRM_ERR(EINVAL);
440                 }
441                 k++;
442                 i++;
443                 if (k == narrays)
444                         break;
445                 /* have one more to process, they come in pairs */
446                 if (!radeon_check_offset(dev_priv, payload[i])) {
447                         DRM_ERROR
448                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
449                              k, i);
450                         return DRM_ERR(EINVAL);
451                 }
452                 k++;
453                 i++;
454         }
455         /* do the counts match what we expect ? */
456         if ((k != narrays) || (i != (count + 1))) {
457                 DRM_ERROR
458                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
459                      k, i, narrays, count + 1);
460                 return DRM_ERR(EINVAL);
461         }
462
463         /* all clear, output packet */
464
465         BEGIN_RING(count + 2);
466         OUT_RING(header);
467         OUT_RING_TABLE(payload, count + 1);
468         ADVANCE_RING();
469
470         cmdbuf->buf += (count + 2) * 4;
471         cmdbuf->bufsz -= (count + 2) * 4;
472
473         return 0;
474 }
475
476 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
477                                              drm_radeon_kcmd_buffer_t *cmdbuf)
478 {
479         u32 *cmd = (u32 *) cmdbuf->buf;
480         int count, ret;
481         RING_LOCALS;
482
483         count=(cmd[0]>>16) & 0x3fff;
484
485         if (cmd[0] & 0x8000) {
486                 u32 offset;
487
488                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL 
489                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
490                         offset = cmd[2] << 10;
491                         ret = !radeon_check_offset(dev_priv, offset);
492                         if (ret) {
493                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
494                                 return DRM_ERR(EINVAL);
495                         }
496                 }
497
498                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
499                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
500                         offset = cmd[3] << 10;
501                         ret = !radeon_check_offset(dev_priv, offset);
502                         if (ret) {
503                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
504                                 return DRM_ERR(EINVAL);
505                         }
506                         
507                 }
508         }
509
510         BEGIN_RING(count+2);
511         OUT_RING(cmd[0]);
512         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
513         ADVANCE_RING();
514
515         cmdbuf->buf += (count+2)*4;
516         cmdbuf->bufsz -= (count+2)*4;
517
518         return 0;
519 }
520
521 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
522                                              drm_radeon_kcmd_buffer_t *cmdbuf)
523 {
524         u32 *cmd = (u32 *) cmdbuf->buf;
525         int count, ret;
526         RING_LOCALS;
527
528         count=(cmd[0]>>16) & 0x3fff;
529
530         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
531                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
532                 return DRM_ERR(EINVAL);
533         }
534         ret = !radeon_check_offset(dev_priv, cmd[2]);
535         if (ret) {
536                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
537                 return DRM_ERR(EINVAL);
538         }
539
540         BEGIN_RING(count+2);
541         OUT_RING(cmd[0]);
542         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
543         ADVANCE_RING();
544
545         cmdbuf->buf += (count+2)*4;
546         cmdbuf->bufsz -= (count+2)*4;
547
548         return 0;
549 }
550
551 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
552                                             drm_radeon_kcmd_buffer_t *cmdbuf)
553 {
554         u32 header;
555         int count;
556         RING_LOCALS;
557
558         if (4 > cmdbuf->bufsz)
559                 return DRM_ERR(EINVAL);
560
561         /* Fixme !! This simply emits a packet without much checking.
562            We need to be smarter. */
563
564         /* obtain first word - actual packet3 header */
565         header = *(u32 *) cmdbuf->buf;
566
567         /* Is it packet 3 ? */
568         if ((header >> 30) != 0x3) {
569                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
570                 return DRM_ERR(EINVAL);
571         }
572
573         count = (header >> 16) & 0x3fff;
574
575         /* Check again now that we know how much data to expect */
576         if ((count + 2) * 4 > cmdbuf->bufsz) {
577                 DRM_ERROR
578                     ("Expected packet3 of length %d but have only %d bytes left\n",
579                      (count + 2) * 4, cmdbuf->bufsz);
580                 return DRM_ERR(EINVAL);
581         }
582
583         /* Is it a packet type we know about ? */
584         switch (header & 0xff00) {
585         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
586                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
587
588         case RADEON_CNTL_BITBLT_MULTI:
589                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
590
591         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
592                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
593         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
594         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
595         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
596         case RADEON_WAIT_FOR_IDLE:
597         case RADEON_CP_NOP:
598                 /* these packets are safe */
599                 break;
600         default:
601                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
602                 return DRM_ERR(EINVAL);
603         }
604
605         BEGIN_RING(count + 2);
606         OUT_RING(header);
607         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
608         ADVANCE_RING();
609
610         cmdbuf->buf += (count + 2) * 4;
611         cmdbuf->bufsz -= (count + 2) * 4;
612
613         return 0;
614 }
615
616 /**
617  * Emit a rendering packet3 from userspace.
618  * Called by r300_do_cp_cmdbuf.
619  */
620 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
621                                         drm_radeon_kcmd_buffer_t *cmdbuf,
622                                         drm_r300_cmd_header_t header)
623 {
624         int n;
625         int ret;
626         char *orig_buf = cmdbuf->buf;
627         int orig_bufsz = cmdbuf->bufsz;
628
629         /* This is a do-while-loop so that we run the interior at least once,
630          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
631          */
632         n = 0;
633         do {
634                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
635                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
636                         if (ret)
637                                 return ret;
638
639                         cmdbuf->buf = orig_buf;
640                         cmdbuf->bufsz = orig_bufsz;
641                 }
642
643                 switch (header.packet3.packet) {
644                 case R300_CMD_PACKET3_CLEAR:
645                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
646                         ret = r300_emit_clear(dev_priv, cmdbuf);
647                         if (ret) {
648                                 DRM_ERROR("r300_emit_clear failed\n");
649                                 return ret;
650                         }
651                         break;
652
653                 case R300_CMD_PACKET3_RAW:
654                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
655                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
656                         if (ret) {
657                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
658                                 return ret;
659                         }
660                         break;
661
662                 default:
663                         DRM_ERROR("bad packet3 type %i at %p\n",
664                                   header.packet3.packet,
665                                   cmdbuf->buf - sizeof(header));
666                         return DRM_ERR(EINVAL);
667                 }
668
669                 n += R300_SIMULTANEOUS_CLIPRECTS;
670         } while (n < cmdbuf->nbox);
671
672         return 0;
673 }
674
675 /* Some of the R300 chips seem to be extremely touchy about the two registers
676  * that are configured in r300_pacify.
677  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
678  * sends a command buffer that contains only state setting commands and a
679  * vertex program/parameter upload sequence, this will eventually lead to a
680  * lockup, unless the sequence is bracketed by calls to r300_pacify.
681  * So we should take great care to *always* call r300_pacify before
682  * *anything* 3D related, and again afterwards. This is what the
683  * call bracket in r300_do_cp_cmdbuf is for.
684  */
685
686 /**
687  * Emit the sequence to pacify R300.
688  */
689 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
690 {
691         RING_LOCALS;
692
693         BEGIN_RING(6);
694         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
695         OUT_RING(0xa);
696         OUT_RING(CP_PACKET0(0x4f18, 0));
697         OUT_RING(0x3);
698         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
699         OUT_RING(0x0);
700         ADVANCE_RING();
701 }
702
703 /**
704  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
705  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
706  * be careful about how this function is called.
707  */
708 static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
709 {
710         drm_radeon_private_t *dev_priv = dev->dev_private;
711         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
712
713         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
714         buf->pending = 1;
715         buf->used = 0;
716 }
717
718 static int r300_scratch(drm_radeon_private_t *dev_priv,
719                         drm_radeon_kcmd_buffer_t *cmdbuf,
720                         drm_r300_cmd_header_t header)
721 {
722         u32 *ref_age_base;
723         u32 i, buf_idx, h_pending;
724         RING_LOCALS;
725         
726         if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
727                 return DRM_ERR(EINVAL);
728         }
729         
730         if (header.scratch.reg >= 5) {
731                 return DRM_ERR(EINVAL);
732         }
733         
734         dev_priv->scratch_ages[header.scratch.reg] ++;
735         
736         ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
737         
738         cmdbuf->buf += sizeof(uint64_t);
739         cmdbuf->bufsz -= sizeof(uint64_t);
740         
741         for (i=0; i < header.scratch.n_bufs; i++) {
742                 buf_idx = *(u32 *)cmdbuf->buf;
743                 buf_idx *= 2; /* 8 bytes per buf */
744                 
745                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
746                         return DRM_ERR(EINVAL);
747                 }
748                                         
749                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
750                         return DRM_ERR(EINVAL);
751                 }
752                                         
753                 if (h_pending == 0) {
754                         return DRM_ERR(EINVAL);
755                 }
756                                         
757                 h_pending--;
758                                                 
759                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
760                         return DRM_ERR(EINVAL);
761                 }
762                                         
763                 cmdbuf->buf += sizeof(buf_idx);
764                 cmdbuf->bufsz -= sizeof(buf_idx);
765         }
766         
767         BEGIN_RING(2);
768         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
769         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
770         ADVANCE_RING();
771         
772         return 0;
773 }
774
775 /**
776  * Parses and validates a user-supplied command buffer and emits appropriate
777  * commands on the DMA ring buffer.
778  * Called by the ioctl handler function radeon_cp_cmdbuf.
779  */
780 int r300_do_cp_cmdbuf(drm_device_t *dev,
781                       DRMFILE filp,
782                       drm_file_t *filp_priv,
783                       drm_radeon_kcmd_buffer_t *cmdbuf)
784 {
785         drm_radeon_private_t *dev_priv = dev->dev_private;
786         drm_device_dma_t *dma = dev->dma;
787         drm_buf_t *buf = NULL;
788         int emit_dispatch_age = 0;
789         int ret = 0;
790
791         DRM_DEBUG("\n");
792
793         /* See the comment above r300_emit_begin3d for why this call must be here,
794          * and what the cleanup gotos are for. */
795         r300_pacify(dev_priv);
796
797         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
798                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
799                 if (ret)
800                         goto cleanup;
801         }
802
803         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
804                 int idx;
805                 drm_r300_cmd_header_t header;
806
807                 header.u = *(unsigned int *)cmdbuf->buf;
808
809                 cmdbuf->buf += sizeof(header);
810                 cmdbuf->bufsz -= sizeof(header);
811
812                 switch (header.header.cmd_type) {
813                 case R300_CMD_PACKET0:
814                         DRM_DEBUG("R300_CMD_PACKET0\n");
815                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
816                         if (ret) {
817                                 DRM_ERROR("r300_emit_packet0 failed\n");
818                                 goto cleanup;
819                         }
820                         break;
821
822                 case R300_CMD_VPU:
823                         DRM_DEBUG("R300_CMD_VPU\n");
824                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
825                         if (ret) {
826                                 DRM_ERROR("r300_emit_vpu failed\n");
827                                 goto cleanup;
828                         }
829                         break;
830
831                 case R300_CMD_PACKET3:
832                         DRM_DEBUG("R300_CMD_PACKET3\n");
833                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
834                         if (ret) {
835                                 DRM_ERROR("r300_emit_packet3 failed\n");
836                                 goto cleanup;
837                         }
838                         break;
839
840                 case R300_CMD_END3D:
841                         DRM_DEBUG("R300_CMD_END3D\n");
842                         /* TODO:
843                            Ideally userspace driver should not need to issue this call,
844                            i.e. the drm driver should issue it automatically and prevent
845                            lockups.
846
847                            In practice, we do not understand why this call is needed and what
848                            it does (except for some vague guesses that it has to do with cache
849                            coherence) and so the user space driver does it.
850
851                            Once we are sure which uses prevent lockups the code could be moved
852                            into the kernel and the userspace driver will not
853                            need to use this command.
854
855                            Note that issuing this command does not hurt anything
856                            except, possibly, performance */
857                         r300_pacify(dev_priv);
858                         break;
859
860                 case R300_CMD_CP_DELAY:
861                         /* simple enough, we can do it here */
862                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
863                         {
864                                 int i;
865                                 RING_LOCALS;
866
867                                 BEGIN_RING(header.delay.count);
868                                 for (i = 0; i < header.delay.count; i++)
869                                         OUT_RING(RADEON_CP_PACKET2);
870                                 ADVANCE_RING();
871                         }
872                         break;
873
874                 case R300_CMD_DMA_DISCARD:
875                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
876                         idx = header.dma.buf_idx;
877                         if (idx < 0 || idx >= dma->buf_count) {
878                                 DRM_ERROR("buffer index %d (of %d max)\n",
879                                           idx, dma->buf_count - 1);
880                                 ret = DRM_ERR(EINVAL);
881                                 goto cleanup;
882                         }
883
884                         buf = dma->buflist[idx];
885                         if (buf->filp != filp || buf->pending) {
886                                 DRM_ERROR("bad buffer %p %p %d\n",
887                                           buf->filp, filp, buf->pending);
888                                 ret = DRM_ERR(EINVAL);
889                                 goto cleanup;
890                         }
891
892                         emit_dispatch_age = 1;
893                         r300_discard_buffer(dev, buf);
894                         break;
895
896                 case R300_CMD_WAIT:
897                         /* simple enough, we can do it here */
898                         DRM_DEBUG("R300_CMD_WAIT\n");
899                         if (header.wait.flags == 0)
900                                 break;  /* nothing to do */
901
902                         {
903                                 RING_LOCALS;
904
905                                 BEGIN_RING(2);
906                                 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
907                                 OUT_RING((header.wait.flags & 0xf) << 14);
908                                 ADVANCE_RING();
909                         }
910                         break;
911
912                 case R300_CMD_SCRATCH:
913                         DRM_DEBUG("R300_CMD_SCRATCH\n");
914                         ret = r300_scratch(dev_priv, cmdbuf, header);
915                         if (ret) {
916                                 DRM_ERROR("r300_scratch failed\n");
917                                 goto cleanup;
918                         }
919                         break;
920                         
921                 default:
922                         DRM_ERROR("bad cmd_type %i at %p\n",
923                                   header.header.cmd_type,
924                                   cmdbuf->buf - sizeof(header));
925                         ret = DRM_ERR(EINVAL);
926                         goto cleanup;
927                 }
928         }
929
930         DRM_DEBUG("END\n");
931
932       cleanup:
933         r300_pacify(dev_priv);
934
935         /* We emit the vertex buffer age here, outside the pacifier "brackets"
936          * for two reasons:
937          *  (1) This may coalesce multiple age emissions into a single one and
938          *  (2) more importantly, some chips lock up hard when scratch registers
939          *      are written inside the pacifier bracket.
940          */
941         if (emit_dispatch_age) {
942                 RING_LOCALS;
943
944                 /* Emit the vertex buffer age */
945                 BEGIN_RING(2);
946                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
947                 ADVANCE_RING();
948         }
949
950         COMMIT_RING();
951
952         return ret;
953 }