OSDN Git Service

radeon: actualy try to fix the corruption
[android-x86/external-libdrm.git] / shared-core / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         struct drm_clip_rect box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return -EFAULT;
78                         }
79
80                         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
81                                 box.x1 = (box.x1) &
82                                         R300_CLIPRECT_MASK;
83                                 box.y1 = (box.y1) &
84                                         R300_CLIPRECT_MASK;
85                                 box.x2 = (box.x2) &
86                                         R300_CLIPRECT_MASK;
87                                 box.y2 = (box.y2) &
88                                         R300_CLIPRECT_MASK;
89                         } else {
90                                 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
91                                         R300_CLIPRECT_MASK;
92                                 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
93                                         R300_CLIPRECT_MASK;
94                                 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
95                                         R300_CLIPRECT_MASK;
96                                 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
97                                         R300_CLIPRECT_MASK;
98
99                         }
100                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
101                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
102                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
103                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
104
105                 }
106
107                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
108
109                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
110                  * client might be able to trample over memory.
111                  * The impact should be very limited, but I'd rather be safe than
112                  * sorry.
113                  */
114                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
115                 OUT_RING(0);
116                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
117                 ADVANCE_RING();
118         } else {
119                 /* Why we allow zero cliprect rendering:
120                  * There are some commands in a command buffer that must be submitted
121                  * even when there are no cliprects, e.g. DMA buffer discard
122                  * or state setting (though state setting could be avoided by
123                  * simulating a loss of context).
124                  *
125                  * Now since the cmdbuf interface is so chaotic right now (and is
126                  * bound to remain that way for a bit until things settle down),
127                  * it is basically impossible to filter out the commands that are
128                  * necessary and those that aren't.
129                  *
130                  * So I choose the safe way and don't do any filtering at all;
131                  * instead, I simply set up the engine so that all rendering
132                  * can't produce any fragments.
133                  */
134                 BEGIN_RING(2);
135                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
136                 ADVANCE_RING();
137         }
138
139         /* flus cache and wait idle clean after cliprect change */
140         BEGIN_RING(2);
141         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
142         OUT_RING(R300_RB3D_DC_FLUSH);
143         ADVANCE_RING();
144         BEGIN_RING(2);
145         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
146         OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
147         ADVANCE_RING();
148         /* set flush flag */
149         dev_priv->track_flush |= RADEON_FLUSH_EMITED;
150
151         return 0;
152 }
153
154 static u8 r300_reg_flags[0x10000 >> 2];
155
156 void r300_init_reg_flags(struct drm_device *dev)
157 {
158         int i;
159         drm_radeon_private_t *dev_priv = dev->dev_private;
160
161         memset(r300_reg_flags, 0, 0x10000 >> 2);
162 #define ADD_RANGE_MARK(reg, count,mark) \
163                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
164                         r300_reg_flags[i]|=(mark);
165
166 #define MARK_SAFE               1
167 #define MARK_CHECK_OFFSET       2
168
169 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
170
171         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
172         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
173         ADD_RANGE(R300_VAP_CNTL, 1);
174         ADD_RANGE(R300_SE_VTE_CNTL, 2);
175         ADD_RANGE(0x2134, 2);
176         ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
177         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
178         ADD_RANGE(0x21DC, 1);
179         ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
180         ADD_RANGE(R300_VAP_CLIP_X_0, 4);
181         ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
182         ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
183         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
184         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
185         ADD_RANGE(R300_GB_ENABLE, 1);
186         ADD_RANGE(R300_GB_MSPOS0, 5);
187         ADD_RANGE(R300_TX_INVALTAGS, 1);
188         ADD_RANGE(R300_TX_ENABLE, 1);
189         ADD_RANGE(0x4200, 4);
190         ADD_RANGE(0x4214, 1);
191         ADD_RANGE(R300_RE_POINTSIZE, 1);
192         ADD_RANGE(0x4230, 3);
193         ADD_RANGE(R300_RE_LINE_CNT, 1);
194         ADD_RANGE(R300_RE_UNK4238, 1);
195         ADD_RANGE(0x4260, 3);
196         ADD_RANGE(R300_RE_SHADE, 4);
197         ADD_RANGE(R300_RE_POLYGON_MODE, 5);
198         ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
199         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
200         ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
201         ADD_RANGE(R300_RE_CULL_CNTL, 1);
202         ADD_RANGE(0x42C0, 2);
203         ADD_RANGE(R300_RS_CNTL_0, 2);
204
205         ADD_RANGE(R300_SC_HYPERZ, 2);
206         ADD_RANGE(0x43E8, 1);
207
208         ADD_RANGE(0x46A4, 5);
209
210         ADD_RANGE(R300_RE_FOG_STATE, 1);
211         ADD_RANGE(R300_FOG_COLOR_R, 3);
212         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
213         ADD_RANGE(0x4BD8, 1);
214         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
215         ADD_RANGE(0x4E00, 1);
216         ADD_RANGE(R300_RB3D_CBLEND, 2);
217         ADD_RANGE(R300_RB3D_COLORMASK, 1);
218         ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
219         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
220         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
221         ADD_RANGE(0x4E50, 9);
222         ADD_RANGE(0x4E88, 1);
223         ADD_RANGE(0x4EA0, 2);
224         ADD_RANGE(R300_ZB_CNTL, 3);
225         ADD_RANGE(R300_ZB_FORMAT, 4);
226         ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);      /* check offset */
227         ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
228         ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
229         ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
230
231         ADD_RANGE(R300_TX_FILTER_0, 16);
232         ADD_RANGE(R300_TX_FILTER1_0, 16);
233         ADD_RANGE(R300_TX_SIZE_0, 16);
234         ADD_RANGE(R300_TX_FORMAT_0, 16);
235         ADD_RANGE(R300_TX_PITCH_0, 16);
236         /* Texture offset is dangerous and needs more checking */
237         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
238         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
239         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
240
241         /* Sporadic registers used as primitives are emitted */
242         ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
243         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
244         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
245         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
246
247         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
248                 ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
249                 ADD_RANGE(R500_US_CONFIG, 2);
250                 ADD_RANGE(R500_US_CODE_ADDR, 3);
251                 ADD_RANGE(R500_US_FC_CTRL, 1);
252                 ADD_RANGE(R500_RS_IP_0, 16);
253                 ADD_RANGE(R500_RS_INST_0, 16);
254                 ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
255                 ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
256                 ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
257         } else {
258                 ADD_RANGE(R300_PFS_CNTL_0, 3);
259                 ADD_RANGE(R300_PFS_NODE_0, 4);
260                 ADD_RANGE(R300_PFS_TEXI_0, 64);
261                 ADD_RANGE(R300_PFS_INSTR0_0, 64);
262                 ADD_RANGE(R300_PFS_INSTR1_0, 64);
263                 ADD_RANGE(R300_PFS_INSTR2_0, 64);
264                 ADD_RANGE(R300_PFS_INSTR3_0, 64);
265                 ADD_RANGE(R300_RS_INTERP_0, 8);
266                 ADD_RANGE(R300_RS_ROUTE_0, 8);
267
268         }
269 }
270
271 static __inline__ int r300_check_range(unsigned reg, int count)
272 {
273         int i;
274         if (reg & ~0xffff)
275                 return -1;
276         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
277                 if (r300_reg_flags[i] != MARK_SAFE)
278                         return 1;
279         return 0;
280 }
281
282 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
283                                                           dev_priv,
284                                                           drm_radeon_kcmd_buffer_t
285                                                           * cmdbuf,
286                                                           drm_r300_cmd_header_t
287                                                           header)
288 {
289         int reg;
290         int sz;
291         int i;
292         int values[64];
293         RING_LOCALS;
294
295         sz = header.packet0.count;
296         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
297
298         if ((sz > 64) || (sz < 0)) {
299                 DRM_ERROR
300                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
301                      reg, sz);
302                 return -EINVAL;
303         }
304         for (i = 0; i < sz; i++) {
305                 values[i] = ((int *)cmdbuf->buf)[i];
306                 switch (r300_reg_flags[(reg >> 2) + i]) {
307                 case MARK_SAFE:
308                         break;
309                 case MARK_CHECK_OFFSET:
310                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
311                                 DRM_ERROR
312                                     ("Offset failed range check (reg=%04x sz=%d)\n",
313                                      reg, sz);
314                                 return -EINVAL;
315                         }
316                         break;
317                 default:
318                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
319                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
320                         return -EINVAL;
321                 }
322         }
323
324         BEGIN_RING(1 + sz);
325         OUT_RING(CP_PACKET0(reg, sz - 1));
326         OUT_RING_TABLE(values, sz);
327         ADVANCE_RING();
328
329         cmdbuf->buf += sz * 4;
330         cmdbuf->bufsz -= sz * 4;
331
332         return 0;
333 }
334
335 /**
336  * Emits a packet0 setting arbitrary registers.
337  * Called by r300_do_cp_cmdbuf.
338  *
339  * Note that checks are performed on contents and addresses of the registers
340  */
341 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
342                                         drm_radeon_kcmd_buffer_t *cmdbuf,
343                                         drm_r300_cmd_header_t header)
344 {
345         int reg;
346         int sz;
347         RING_LOCALS;
348
349         sz = header.packet0.count;
350         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
351
352         DRM_DEBUG("R300_CMD_PACKET0: reg %04x, sz %d\n", reg, sz);
353         if (!sz)
354                 return 0;
355
356         if (sz * 4 > cmdbuf->bufsz)
357                 return -EINVAL;
358
359         if (reg + sz * 4 >= 0x10000) {
360                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
361                           sz);
362                 return -EINVAL;
363         }
364
365         if (r300_check_range(reg, sz)) {
366                 /* go and check everything */
367                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
368                                                            header);
369         }
370         /* the rest of the data is safe to emit, whatever the values the user passed */
371
372         BEGIN_RING(1 + sz);
373         OUT_RING(CP_PACKET0(reg, sz - 1));
374         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
375         ADVANCE_RING();
376
377         cmdbuf->buf += sz * 4;
378         cmdbuf->bufsz -= sz * 4;
379
380         return 0;
381 }
382
383 /**
384  * Uploads user-supplied vertex program instructions or parameters onto
385  * the graphics card.
386  * Called by r300_do_cp_cmdbuf.
387  */
388 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
389                                     drm_radeon_kcmd_buffer_t *cmdbuf,
390                                     drm_r300_cmd_header_t header)
391 {
392         int sz;
393         int addr;
394         RING_LOCALS;
395
396         sz = header.vpu.count;
397         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
398
399         if (!sz)
400                 return 0;
401         if (sz * 16 > cmdbuf->bufsz)
402                 return -EINVAL;
403
404         /* VAP is very sensitive so we purge cache before we program it
405          * and we also flush its state before & after */
406         BEGIN_RING(6);
407         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
408         OUT_RING(R300_RB3D_DC_FLUSH);
409         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
410         OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
411         OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
412         OUT_RING(0);
413         ADVANCE_RING();
414         /* set flush flag */
415         dev_priv->track_flush |= RADEON_FLUSH_EMITED;
416
417         BEGIN_RING(3 + sz * 4);
418         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
419         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
420         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
421         ADVANCE_RING();
422
423         BEGIN_RING(2);
424         OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
425         OUT_RING(0);
426         ADVANCE_RING();
427
428         cmdbuf->buf += sz * 16;
429         cmdbuf->bufsz -= sz * 16;
430
431         return 0;
432 }
433
434 /**
435  * Emit a clear packet from userspace.
436  * Called by r300_emit_packet3.
437  */
438 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
439                                       drm_radeon_kcmd_buffer_t *cmdbuf)
440 {
441         RING_LOCALS;
442
443         if (8 * 4 > cmdbuf->bufsz)
444                 return -EINVAL;
445
446         BEGIN_RING(10);
447         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
448         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
449                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
450         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
451         ADVANCE_RING();
452
453         BEGIN_RING(4);
454         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
455         OUT_RING(R300_RB3D_DC_FLUSH);
456         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
457         OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
458         ADVANCE_RING();
459         /* set flush flag */
460         dev_priv->track_flush |= RADEON_FLUSH_EMITED;
461
462         cmdbuf->buf += 8 * 4;
463         cmdbuf->bufsz -= 8 * 4;
464
465         return 0;
466 }
467
468 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
469                                                drm_radeon_kcmd_buffer_t *cmdbuf,
470                                                u32 header)
471 {
472         int count, i, k;
473 #define MAX_ARRAY_PACKET  64
474         u32 payload[MAX_ARRAY_PACKET];
475         u32 narrays;
476         RING_LOCALS;
477
478         count = (header >> 16) & 0x3fff;
479
480         if ((count + 1) > MAX_ARRAY_PACKET) {
481                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
482                           count);
483                 return -EINVAL;
484         }
485         memset(payload, 0, MAX_ARRAY_PACKET * 4);
486         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
487
488         /* carefully check packet contents */
489
490         narrays = payload[0];
491         k = 0;
492         i = 1;
493         while ((k < narrays) && (i < (count + 1))) {
494                 i++;            /* skip attribute field */
495                 if (!radeon_check_offset(dev_priv, payload[i])) {
496                         DRM_ERROR
497                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
498                              k, i);
499                         return -EINVAL;
500                 }
501                 k++;
502                 i++;
503                 if (k == narrays)
504                         break;
505                 /* have one more to process, they come in pairs */
506                 if (!radeon_check_offset(dev_priv, payload[i])) {
507                         DRM_ERROR
508                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
509                              k, i);
510                         return -EINVAL;
511                 }
512                 k++;
513                 i++;
514         }
515         /* do the counts match what we expect ? */
516         if ((k != narrays) || (i != (count + 1))) {
517                 DRM_ERROR
518                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
519                      k, i, narrays, count + 1);
520                 return -EINVAL;
521         }
522
523         /* all clear, output packet */
524
525         BEGIN_RING(count + 2);
526         OUT_RING(header);
527         OUT_RING_TABLE(payload, count + 1);
528         ADVANCE_RING();
529
530         cmdbuf->buf += (count + 2) * 4;
531         cmdbuf->bufsz -= (count + 2) * 4;
532
533         return 0;
534 }
535
536 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
537                                              drm_radeon_kcmd_buffer_t *cmdbuf)
538 {
539         u32 *cmd = (u32 *) cmdbuf->buf;
540         int count, ret;
541         RING_LOCALS;
542
543         count=(cmd[0]>>16) & 0x3fff;
544
545         if (cmd[0] & 0x8000) {
546                 u32 offset;
547
548                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
549                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
550                         offset = cmd[2] << 10;
551                         ret = !radeon_check_offset(dev_priv, offset);
552                         if (ret) {
553                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
554                                 return -EINVAL;
555                         }
556                 }
557
558                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
559                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
560                         offset = cmd[3] << 10;
561                         ret = !radeon_check_offset(dev_priv, offset);
562                         if (ret) {
563                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
564                                 return -EINVAL;
565                         }
566
567                 }
568         }
569
570         BEGIN_RING(count+2);
571         OUT_RING(cmd[0]);
572         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
573         ADVANCE_RING();
574
575         cmdbuf->buf += (count+2)*4;
576         cmdbuf->bufsz -= (count+2)*4;
577
578         return 0;
579 }
580
581 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
582                                              drm_radeon_kcmd_buffer_t *cmdbuf)
583 {
584         u32 *cmd = (u32 *) cmdbuf->buf;
585         int count, ret;
586         RING_LOCALS;
587
588         count=(cmd[0]>>16) & 0x3fff;
589
590         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
591                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
592                 return -EINVAL;
593         }
594         ret = !radeon_check_offset(dev_priv, cmd[2]);
595         if (ret) {
596                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
597                 return -EINVAL;
598         }
599
600         BEGIN_RING(count+2);
601         OUT_RING(cmd[0]);
602         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
603         ADVANCE_RING();
604
605         cmdbuf->buf += (count+2)*4;
606         cmdbuf->bufsz -= (count+2)*4;
607
608         return 0;
609 }
610
611 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
612                                             drm_radeon_kcmd_buffer_t *cmdbuf)
613 {
614         u32 header;
615         int count;
616         RING_LOCALS;
617
618         if (4 > cmdbuf->bufsz)
619                 return -EINVAL;
620
621         /* Fixme !! This simply emits a packet without much checking.
622            We need to be smarter. */
623
624         /* obtain first word - actual packet3 header */
625         header = *(u32 *) cmdbuf->buf;
626
627         /* Is it packet 3 ? */
628         if ((header >> 30) != 0x3) {
629                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
630                 return -EINVAL;
631         }
632
633         count = (header >> 16) & 0x3fff;
634
635         /* Check again now that we know how much data to expect */
636         if ((count + 2) * 4 > cmdbuf->bufsz) {
637                 DRM_ERROR
638                     ("Expected packet3 of length %d but have only %d bytes left\n",
639                      (count + 2) * 4, cmdbuf->bufsz);
640                 return -EINVAL;
641         }
642
643         /* Is it a packet type we know about ? */
644         switch (header & 0xff00) {
645         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
646                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
647
648         case RADEON_CNTL_BITBLT_MULTI:
649                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
650
651         case RADEON_CP_INDX_BUFFER:
652                 /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
653                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
654         case RADEON_CP_3D_DRAW_IMMD_2:
655                 /* triggers drawing using in-packet vertex data */
656         case RADEON_CP_3D_DRAW_VBUF_2:
657                 /* triggers drawing of vertex buffers setup elsewhere */
658         case RADEON_CP_3D_DRAW_INDX_2:
659                 /* triggers drawing using indices to vertex buffer */
660                 /* whenever we send vertex we clear flush & purge */
661                 dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
662                                            RADEON_PURGE_EMITED);
663                 break;
664         case RADEON_WAIT_FOR_IDLE:
665         case RADEON_CP_NOP:
666                 /* these packets are safe */
667                 break;
668         default:
669                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
670                 return -EINVAL;
671         }
672
673         BEGIN_RING(count + 2);
674         OUT_RING(header);
675         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
676         ADVANCE_RING();
677
678         cmdbuf->buf += (count + 2) * 4;
679         cmdbuf->bufsz -= (count + 2) * 4;
680
681         return 0;
682 }
683
684 /**
685  * Emit a rendering packet3 from userspace.
686  * Called by r300_do_cp_cmdbuf.
687  */
688 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
689                                         drm_radeon_kcmd_buffer_t *cmdbuf,
690                                         drm_r300_cmd_header_t header)
691 {
692         int n;
693         int ret;
694         char *orig_buf = cmdbuf->buf;
695         int orig_bufsz = cmdbuf->bufsz;
696
697         /* This is a do-while-loop so that we run the interior at least once,
698          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
699          */
700         n = 0;
701         do {
702                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
703                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
704                         if (ret)
705                                 return ret;
706
707                         cmdbuf->buf = orig_buf;
708                         cmdbuf->bufsz = orig_bufsz;
709                 }
710
711                 switch (header.packet3.packet) {
712                 case R300_CMD_PACKET3_CLEAR:
713                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
714                         ret = r300_emit_clear(dev_priv, cmdbuf);
715                         if (ret) {
716                                 DRM_ERROR("r300_emit_clear failed\n");
717                                 return ret;
718                         }
719                         break;
720
721                 case R300_CMD_PACKET3_RAW:
722                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
723                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
724                         if (ret) {
725                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
726                                 return ret;
727                         }
728                         break;
729
730                 default:
731                         DRM_ERROR("bad packet3 type %i at %p\n",
732                                   header.packet3.packet,
733                                   cmdbuf->buf - sizeof(header));
734                         return -EINVAL;
735                 }
736
737                 n += R300_SIMULTANEOUS_CLIPRECTS;
738         } while (n < cmdbuf->nbox);
739
740         return 0;
741 }
742
743 /* Some of the R300 chips seem to be extremely touchy about the two registers
744  * that are configured in r300_pacify.
745  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
746  * sends a command buffer that contains only state setting commands and a
747  * vertex program/parameter upload sequence, this will eventually lead to a
748  * lockup, unless the sequence is bracketed by calls to r300_pacify.
749  * So we should take great care to *always* call r300_pacify before
750  * *anything* 3D related, and again afterwards. This is what the
751  * call bracket in r300_do_cp_cmdbuf is for.
752  */
753
754 /**
755  * Emit the sequence to pacify R300.
756  */
757 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
758 {
759         RING_LOCALS;
760         uint32_t cache_z, cache_3d, cache_2d;
761         
762         cache_z = R300_ZC_FLUSH;
763         cache_2d = R300_RB2D_DC_FLUSH;
764         cache_3d = R300_RB3D_DC_FLUSH;
765         if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
766                 /* we can purge, primitive where draw since last purge */
767                 cache_z |= R300_ZC_FREE;
768                 cache_2d |= R300_RB2D_DC_FREE;
769                 cache_3d |= R300_RB3D_DC_FREE;
770         }
771
772         /* flush & purge zbuffer */
773         BEGIN_RING(2);
774         OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
775         OUT_RING(cache_z);
776         ADVANCE_RING();
777         /* flush & purge 3d */
778         BEGIN_RING(2);
779         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
780         OUT_RING(cache_3d);
781         ADVANCE_RING();
782         /* flush & purge texture */
783         BEGIN_RING(2);
784         OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
785         OUT_RING(0);
786         ADVANCE_RING();
787         /* FIXME: is this one really needed ? */
788         BEGIN_RING(2);
789         OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
790         OUT_RING(0);
791         ADVANCE_RING();
792         BEGIN_RING(2);
793         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
794         OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
795         ADVANCE_RING();
796         /* flush & purge 2d through E2 as RB2D will trigger lockup */
797         BEGIN_RING(4);
798         OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
799         OUT_RING(cache_2d);
800         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
801         OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
802                  RADEON_WAIT_HOST_IDLECLEAN);
803         ADVANCE_RING();
804         /* set flush & purge flags */
805         dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
806 }
807
808 /**
809  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
810  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
811  * be careful about how this function is called.
812  */
813 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
814 {
815         drm_radeon_private_t *dev_priv = dev->dev_private;
816         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
817
818         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
819         buf->pending = 1;
820         buf->used = 0;
821 }
822
823 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
824                           drm_r300_cmd_header_t header)
825 {
826         u32 wait_until;
827         RING_LOCALS;
828
829         if (!header.wait.flags)
830                 return;
831
832         wait_until = 0;
833
834         switch(header.wait.flags) {
835         case R300_WAIT_2D:
836                 wait_until = RADEON_WAIT_2D_IDLE;
837                 break;
838         case R300_WAIT_3D:
839                 wait_until = RADEON_WAIT_3D_IDLE;
840                 break;
841         case R300_NEW_WAIT_2D_3D:
842                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
843                 break;
844         case R300_NEW_WAIT_2D_2D_CLEAN:
845                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
846                 break;
847         case R300_NEW_WAIT_3D_3D_CLEAN:
848                 wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
849                 break;
850         case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
851                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
852                 wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
853                 break;
854         default:
855                 return;
856         }
857
858         BEGIN_RING(2);
859         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
860         OUT_RING(wait_until);
861         ADVANCE_RING();
862 }
863
864 static int r300_scratch(drm_radeon_private_t *dev_priv,
865                         drm_radeon_kcmd_buffer_t *cmdbuf,
866                         drm_r300_cmd_header_t header)
867 {
868         u32 *ref_age_base;
869         u32 i, buf_idx, h_pending;
870         RING_LOCALS;
871
872         if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
873                 return -EINVAL;
874         }
875
876         if (header.scratch.reg >= 5) {
877                 return -EINVAL;
878         }
879
880         dev_priv->scratch_ages[header.scratch.reg] ++;
881
882         ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
883
884         cmdbuf->buf += sizeof(uint64_t);
885         cmdbuf->bufsz -= sizeof(uint64_t);
886
887         for (i=0; i < header.scratch.n_bufs; i++) {
888                 buf_idx = *(u32 *)cmdbuf->buf;
889                 buf_idx *= 2; /* 8 bytes per buf */
890
891                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
892                         return -EINVAL;
893                 }
894
895                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
896                         return -EINVAL;
897                 }
898
899                 if (h_pending == 0) {
900                         return -EINVAL;
901                 }
902
903                 h_pending--;
904
905                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
906                         return -EINVAL;
907                 }
908
909                 cmdbuf->buf += sizeof(buf_idx);
910                 cmdbuf->bufsz -= sizeof(buf_idx);
911         }
912
913         BEGIN_RING(2);
914         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
915         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
916         ADVANCE_RING();
917
918         return 0;
919 }
920
921 /**
922  * Uploads user-supplied vertex program instructions or parameters onto
923  * the graphics card.
924  * Called by r300_do_cp_cmdbuf.
925  */
926 static __inline__ int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
927                                        drm_radeon_kcmd_buffer_t *cmdbuf,
928                                        drm_r300_cmd_header_t header)
929 {
930         int sz;
931         int addr;
932         int type;
933         int clamp;
934         int stride;
935         RING_LOCALS;
936
937         sz = header.r500fp.count;
938         /* address is 9 bits 0 - 8, bit 1 of flags is part of address */
939         addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
940
941         type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
942         clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
943
944         addr |= (type << 16);
945         addr |= (clamp << 17);
946
947         stride = type ? 4 : 6;
948
949         DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
950         if (!sz)
951                 return 0;
952         if (sz * stride * 4 > cmdbuf->bufsz)
953                 return -EINVAL;
954
955         BEGIN_RING(3 + sz * stride);
956         OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
957         OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
958         OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
959
960         ADVANCE_RING();
961
962         cmdbuf->buf += sz * stride * 4;
963         cmdbuf->bufsz -= sz * stride * 4;
964
965         return 0;
966 }
967
968
969 /**
970  * Parses and validates a user-supplied command buffer and emits appropriate
971  * commands on the DMA ring buffer.
972  * Called by the ioctl handler function radeon_cp_cmdbuf.
973  */
974 int r300_do_cp_cmdbuf(struct drm_device *dev,
975                       struct drm_file *file_priv,
976                       drm_radeon_kcmd_buffer_t *cmdbuf)
977 {
978         drm_radeon_private_t *dev_priv = dev->dev_private;
979         struct drm_device_dma *dma = dev->dma;
980         struct drm_buf *buf = NULL;
981         int emit_dispatch_age = 0;
982         int ret = 0;
983         RING_LOCALS;
984
985         DRM_DEBUG("\n");
986
987         /* pacify */
988         r300_pacify(dev_priv);
989
990         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
991                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
992                 if (ret)
993                         goto cleanup;
994         }
995
996         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
997                 int idx;
998                 drm_r300_cmd_header_t header;
999
1000                 header.u = *(unsigned int *)cmdbuf->buf;
1001
1002                 cmdbuf->buf += sizeof(header);
1003                 cmdbuf->bufsz -= sizeof(header);
1004
1005                 switch (header.header.cmd_type) {
1006                 case R300_CMD_PACKET0:
1007                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
1008                         if (ret) {
1009                                 DRM_ERROR("r300_emit_packet0 failed\n");
1010                                 goto cleanup;
1011                         }
1012                         break;
1013
1014                 case R300_CMD_VPU:
1015                         DRM_DEBUG("R300_CMD_VPU\n");
1016                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
1017                         if (ret) {
1018                                 DRM_ERROR("r300_emit_vpu failed\n");
1019                                 goto cleanup;
1020                         }
1021                         break;
1022
1023                 case R300_CMD_PACKET3:
1024                         DRM_DEBUG("R300_CMD_PACKET3\n");
1025                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
1026                         if (ret) {
1027                                 DRM_ERROR("r300_emit_packet3 failed\n");
1028                                 goto cleanup;
1029                         }
1030                         break;
1031
1032                 case R300_CMD_END3D:
1033                         DRM_DEBUG("R300_CMD_END3D\n");
1034                         /* TODO:
1035                            Ideally userspace driver should not need to issue this call,
1036                            i.e. the drm driver should issue it automatically and prevent
1037                            lockups.
1038
1039                            In practice, we do not understand why this call is needed and what
1040                            it does (except for some vague guesses that it has to do with cache
1041                            coherence) and so the user space driver does it.
1042
1043                            Once we are sure which uses prevent lockups the code could be moved
1044                            into the kernel and the userspace driver will not
1045                            need to use this command.
1046
1047                            Note that issuing this command does not hurt anything
1048                            except, possibly, performance */
1049                         r300_pacify(dev_priv);
1050                         break;
1051
1052                 case R300_CMD_CP_DELAY:
1053                         /* simple enough, we can do it here */
1054                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
1055                         {
1056                                 int i;
1057                                 RING_LOCALS;
1058
1059                                 BEGIN_RING(header.delay.count);
1060                                 for (i = 0; i < header.delay.count; i++)
1061                                         OUT_RING(RADEON_CP_PACKET2);
1062                                 ADVANCE_RING();
1063                         }
1064                         break;
1065
1066                 case R300_CMD_DMA_DISCARD:
1067                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1068                         idx = header.dma.buf_idx;
1069                         if (idx < 0 || idx >= dma->buf_count) {
1070                                 DRM_ERROR("buffer index %d (of %d max)\n",
1071                                           idx, dma->buf_count - 1);
1072                                 ret = -EINVAL;
1073                                 goto cleanup;
1074                         }
1075
1076                         buf = dma->buflist[idx];
1077                         if (buf->file_priv != file_priv || buf->pending) {
1078                                 DRM_ERROR("bad buffer %p %p %d\n",
1079                                           buf->file_priv, file_priv,
1080                                           buf->pending);
1081                                 ret = -EINVAL;
1082                                 goto cleanup;
1083                         }
1084
1085                         emit_dispatch_age = 1;
1086                         r300_discard_buffer(dev, buf);
1087                         break;
1088
1089                 case R300_CMD_WAIT:
1090                         DRM_DEBUG("R300_CMD_WAIT\n");
1091                         r300_cmd_wait(dev_priv, header);
1092                         break;
1093
1094                 case R300_CMD_SCRATCH:
1095                         DRM_DEBUG("R300_CMD_SCRATCH\n");
1096                         ret = r300_scratch(dev_priv, cmdbuf, header);
1097                         if (ret) {
1098                                 DRM_ERROR("r300_scratch failed\n");
1099                                 goto cleanup;
1100                         }
1101                         break;
1102
1103                 case R300_CMD_R500FP:
1104                         if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1105                                 DRM_ERROR("Calling r500 command on r300 card\n");
1106                                 ret = -EINVAL;
1107                                 goto cleanup;
1108                         }
1109                         DRM_DEBUG("R300_CMD_R500FP\n");
1110                         ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
1111                         if (ret) {
1112                                 DRM_ERROR("r300_emit_r500fp failed\n");
1113                                 goto cleanup;
1114                         }
1115                         break;
1116                 default:
1117                         DRM_ERROR("bad cmd_type %i at %p\n",
1118                                   header.header.cmd_type,
1119                                   cmdbuf->buf - sizeof(header));
1120                         ret = -EINVAL;
1121                         goto cleanup;
1122                 }
1123         }
1124
1125         DRM_DEBUG("END\n");
1126
1127       cleanup:
1128         r300_pacify(dev_priv);
1129
1130         /* We emit the vertex buffer age here, outside the pacifier "brackets"
1131          * for two reasons:
1132          *  (1) This may coalesce multiple age emissions into a single one and
1133          *  (2) more importantly, some chips lock up hard when scratch registers
1134          *      are written inside the pacifier bracket.
1135          */
1136         if (emit_dispatch_age) {
1137                 RING_LOCALS;
1138
1139                 /* Emit the vertex buffer age */
1140                 BEGIN_RING(2);
1141                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
1142                 ADVANCE_RING();
1143         }
1144
1145         COMMIT_RING();
1146
1147         return ret;
1148 }