OSDN Git Service

nouveau: fix bo mapping issue
[android-x86/external-libdrm.git] / shared-core / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     struct drm_file *file_priv,
43                                                     u32 * offset)
44 {
45         u64 off = *offset;
46         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47         struct drm_radeon_driver_file_fields *radeon_priv;
48
49         /* Hrm ... the story of the offset ... So this function converts
50          * the various ideas of what userland clients might have for an
51          * offset in the card address space into an offset into the card
52          * address space :) So with a sane client, it should just keep
53          * the value intact and just do some boundary checking. However,
54          * not all clients are sane. Some older clients pass us 0 based
55          * offsets relative to the start of the framebuffer and some may
56          * assume the AGP aperture it appended to the framebuffer, so we
57          * try to detect those cases and fix them up.
58          *
59          * Note: It might be a good idea here to make sure the offset lands
60          * in some "allowed" area to protect things like the PCIE GART...
61          */
62
63         /* First, the best case, the offset already lands in either the
64          * framebuffer or the GART mapped space
65          */
66         if (radeon_check_offset(dev_priv, off))
67                 return 0;
68
69         /* Ok, that didn't happen... now check if we have a zero based
70          * offset that fits in the framebuffer + gart space, apply the
71          * magic offset we get from SETPARAM or calculated from fb_location
72          */
73         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74                 radeon_priv = file_priv->driver_priv;
75                 off += radeon_priv->radeon_fb_delta;
76         }
77
78         /* Finally, assume we aimed at a GART offset if beyond the fb */
79         if (off > fb_end)
80                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
81
82         /* Now recheck and fail if out of bounds */
83         if (radeon_check_offset(dev_priv, off)) {
84                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85                 *offset = off;
86                 return 0;
87         }
88         return -EINVAL;
89 }
90
91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92                                                      dev_priv,
93                                                      struct drm_file *file_priv,
94                                                      int id, u32 *data)
95 {
96         switch (id) {
97
98         case RADEON_EMIT_PP_MISC:
99                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
100                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101                         DRM_ERROR("Invalid depth buffer offset\n");
102                         return -EINVAL;
103                 }
104                 break;
105
106         case RADEON_EMIT_PP_CNTL:
107                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
108                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
109                         DRM_ERROR("Invalid colour buffer offset\n");
110                         return -EINVAL;
111                 }
112                 break;
113
114         case R200_EMIT_PP_TXOFFSET_0:
115         case R200_EMIT_PP_TXOFFSET_1:
116         case R200_EMIT_PP_TXOFFSET_2:
117         case R200_EMIT_PP_TXOFFSET_3:
118         case R200_EMIT_PP_TXOFFSET_4:
119         case R200_EMIT_PP_TXOFFSET_5:
120                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
121                                                   &data[0])) {
122                         DRM_ERROR("Invalid R200 texture offset\n");
123                         return -EINVAL;
124                 }
125                 break;
126
127         case RADEON_EMIT_PP_TXFILTER_0:
128         case RADEON_EMIT_PP_TXFILTER_1:
129         case RADEON_EMIT_PP_TXFILTER_2:
130                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
131                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
132                         DRM_ERROR("Invalid R100 texture offset\n");
133                         return -EINVAL;
134                 }
135                 break;
136
137         case R200_EMIT_PP_CUBIC_OFFSETS_0:
138         case R200_EMIT_PP_CUBIC_OFFSETS_1:
139         case R200_EMIT_PP_CUBIC_OFFSETS_2:
140         case R200_EMIT_PP_CUBIC_OFFSETS_3:
141         case R200_EMIT_PP_CUBIC_OFFSETS_4:
142         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
143                         int i;
144                         for (i = 0; i < 5; i++) {
145                                 if (radeon_check_and_fixup_offset(dev_priv,
146                                                                   file_priv,
147                                                                   &data[i])) {
148                                         DRM_ERROR
149                                             ("Invalid R200 cubic texture offset\n");
150                                         return -EINVAL;
151                                 }
152                         }
153                         break;
154                 }
155
156         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
157         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
159                         int i;
160                         for (i = 0; i < 5; i++) {
161                                 if (radeon_check_and_fixup_offset(dev_priv,
162                                                                   file_priv,
163                                                                   &data[i])) {
164                                         DRM_ERROR
165                                             ("Invalid R100 cubic texture offset\n");
166                                         return -EINVAL;
167                                 }
168                         }
169                 }
170                 break;
171
172         case R200_EMIT_VAP_CTL: {
173                         RING_LOCALS;
174                         BEGIN_RING(2);
175                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
176                         ADVANCE_RING();
177                 }
178                 break;
179
180         case RADEON_EMIT_RB3D_COLORPITCH:
181         case RADEON_EMIT_RE_LINE_PATTERN:
182         case RADEON_EMIT_SE_LINE_WIDTH:
183         case RADEON_EMIT_PP_LUM_MATRIX:
184         case RADEON_EMIT_PP_ROT_MATRIX_0:
185         case RADEON_EMIT_RB3D_STENCILREFMASK:
186         case RADEON_EMIT_SE_VPORT_XSCALE:
187         case RADEON_EMIT_SE_CNTL:
188         case RADEON_EMIT_SE_CNTL_STATUS:
189         case RADEON_EMIT_RE_MISC:
190         case RADEON_EMIT_PP_BORDER_COLOR_0:
191         case RADEON_EMIT_PP_BORDER_COLOR_1:
192         case RADEON_EMIT_PP_BORDER_COLOR_2:
193         case RADEON_EMIT_SE_ZBIAS_FACTOR:
194         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
195         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
196         case R200_EMIT_PP_TXCBLEND_0:
197         case R200_EMIT_PP_TXCBLEND_1:
198         case R200_EMIT_PP_TXCBLEND_2:
199         case R200_EMIT_PP_TXCBLEND_3:
200         case R200_EMIT_PP_TXCBLEND_4:
201         case R200_EMIT_PP_TXCBLEND_5:
202         case R200_EMIT_PP_TXCBLEND_6:
203         case R200_EMIT_PP_TXCBLEND_7:
204         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
205         case R200_EMIT_TFACTOR_0:
206         case R200_EMIT_VTX_FMT_0:
207         case R200_EMIT_MATRIX_SELECT_0:
208         case R200_EMIT_TEX_PROC_CTL_2:
209         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
210         case R200_EMIT_PP_TXFILTER_0:
211         case R200_EMIT_PP_TXFILTER_1:
212         case R200_EMIT_PP_TXFILTER_2:
213         case R200_EMIT_PP_TXFILTER_3:
214         case R200_EMIT_PP_TXFILTER_4:
215         case R200_EMIT_PP_TXFILTER_5:
216         case R200_EMIT_VTE_CNTL:
217         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
218         case R200_EMIT_PP_TAM_DEBUG3:
219         case R200_EMIT_PP_CNTL_X:
220         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
221         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
222         case R200_EMIT_RE_SCISSOR_TL_0:
223         case R200_EMIT_RE_SCISSOR_TL_1:
224         case R200_EMIT_RE_SCISSOR_TL_2:
225         case R200_EMIT_SE_VAP_CNTL_STATUS:
226         case R200_EMIT_SE_VTX_STATE_CNTL:
227         case R200_EMIT_RE_POINTSIZE:
228         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
229         case R200_EMIT_PP_CUBIC_FACES_0:
230         case R200_EMIT_PP_CUBIC_FACES_1:
231         case R200_EMIT_PP_CUBIC_FACES_2:
232         case R200_EMIT_PP_CUBIC_FACES_3:
233         case R200_EMIT_PP_CUBIC_FACES_4:
234         case R200_EMIT_PP_CUBIC_FACES_5:
235         case RADEON_EMIT_PP_TEX_SIZE_0:
236         case RADEON_EMIT_PP_TEX_SIZE_1:
237         case RADEON_EMIT_PP_TEX_SIZE_2:
238         case R200_EMIT_RB3D_BLENDCOLOR:
239         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
240         case RADEON_EMIT_PP_CUBIC_FACES_0:
241         case RADEON_EMIT_PP_CUBIC_FACES_1:
242         case RADEON_EMIT_PP_CUBIC_FACES_2:
243         case R200_EMIT_PP_TRI_PERF_CNTL:
244         case R200_EMIT_PP_AFS_0:
245         case R200_EMIT_PP_AFS_1:
246         case R200_EMIT_ATF_TFACTOR:
247         case R200_EMIT_PP_TXCTLALL_0:
248         case R200_EMIT_PP_TXCTLALL_1:
249         case R200_EMIT_PP_TXCTLALL_2:
250         case R200_EMIT_PP_TXCTLALL_3:
251         case R200_EMIT_PP_TXCTLALL_4:
252         case R200_EMIT_PP_TXCTLALL_5:
253         case R200_EMIT_VAP_PVS_CNTL:
254                 /* These packets don't contain memory offsets */
255                 break;
256
257         default:
258                 DRM_ERROR("Unknown state packet ID %d\n", id);
259                 return -EINVAL;
260         }
261
262         return 0;
263 }
264
265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
266                                                      dev_priv,
267                                                      struct drm_file *file_priv,
268                                                      drm_radeon_kcmd_buffer_t *
269                                                      cmdbuf,
270                                                      unsigned int *cmdsz)
271 {
272         u32 *cmd = (u32 *) cmdbuf->buf;
273         u32 offset, narrays;
274         int count, i, k;
275
276         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
277
278         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
279                 DRM_ERROR("Not a type 3 packet\n");
280                 return -EINVAL;
281         }
282
283         if (4 * *cmdsz > cmdbuf->bufsz) {
284                 DRM_ERROR("Packet size larger than size of data provided\n");
285                 return -EINVAL;
286         }
287
288         switch(cmd[0] & 0xff00) {
289         /* XXX Are there old drivers needing other packets? */
290
291         case RADEON_3D_DRAW_IMMD:
292         case RADEON_3D_DRAW_VBUF:
293         case RADEON_3D_DRAW_INDX:
294         case RADEON_WAIT_FOR_IDLE:
295         case RADEON_CP_NOP:
296         case RADEON_3D_CLEAR_ZMASK:
297 /*      case RADEON_CP_NEXT_CHAR:
298         case RADEON_CP_PLY_NEXTSCAN:
299         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
300                 /* these packets are safe */
301                 break;
302
303         case RADEON_CP_3D_DRAW_IMMD_2:
304         case RADEON_CP_3D_DRAW_VBUF_2:
305         case RADEON_CP_3D_DRAW_INDX_2:
306         case RADEON_3D_CLEAR_HIZ:
307                 /* safe but r200 only */
308                 if ((dev_priv->chip_family < CHIP_R200) ||
309                     (dev_priv->chip_family > CHIP_RV280)) {
310                         DRM_ERROR("Invalid 3d packet for non r200-class chip\n");
311                         return -EINVAL;
312                 }
313                 break;
314
315         case RADEON_3D_LOAD_VBPNTR:
316                 count = (cmd[0] >> 16) & 0x3fff;
317
318                 if (count > 18) { /* 12 arrays max */
319                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
320                                   count);
321                         return -EINVAL;
322                 }
323
324                 /* carefully check packet contents */
325                 narrays = cmd[1] & ~0xc000;
326                 k = 0;
327                 i = 2;
328                 while ((k < narrays) && (i < (count + 2))) {
329                         i++;            /* skip attribute field */
330                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
331                                                           &cmd[i])) {
332                                 DRM_ERROR
333                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
334                                      k, i);
335                                 return -EINVAL;
336                         }
337                         k++;
338                         i++;
339                         if (k == narrays)
340                                 break;
341                         /* have one more to process, they come in pairs */
342                         if (radeon_check_and_fixup_offset(dev_priv,
343                                                           file_priv, &cmd[i]))
344                         {
345                                 DRM_ERROR
346                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
347                                      k, i);
348                                 return -EINVAL;
349                         }
350                         k++;
351                         i++;
352                 }
353                 /* do the counts match what we expect ? */
354                 if ((k != narrays) || (i != (count + 2))) {
355                         DRM_ERROR
356                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
357                               k, i, narrays, count + 1);
358                         return -EINVAL;
359                 }
360                 break;
361
362         case RADEON_3D_RNDR_GEN_INDX_PRIM:
363                 if (dev_priv->chip_family > CHIP_RS200) {
364                         DRM_ERROR("Invalid 3d packet for non-r100-class chip\n");
365                         return -EINVAL;
366                 }
367                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
368                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
369                                 return -EINVAL;
370                 }
371                 break;
372
373         case RADEON_CP_INDX_BUFFER:
374                 /* safe but r200 only */
375                 if ((dev_priv->chip_family < CHIP_R200) ||
376                     (dev_priv->chip_family > CHIP_RV280)) {
377                         DRM_ERROR("Invalid 3d packet for non-r200-class chip\n");
378                         return -EINVAL;
379                 }
380                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
381                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
382                         return -EINVAL;
383                 }
384                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
385                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
386                         return -EINVAL;
387                 }
388                 break;
389
390         case RADEON_CNTL_HOSTDATA_BLT:
391         case RADEON_CNTL_PAINT_MULTI:
392         case RADEON_CNTL_BITBLT_MULTI:
393                 /* MSB of opcode: next DWORD GUI_CNTL */
394                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
395                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
396                         offset = cmd[2] << 10;
397                         if (radeon_check_and_fixup_offset
398                             (dev_priv, file_priv, &offset)) {
399                                 DRM_ERROR("Invalid first packet offset\n");
400                                 return -EINVAL;
401                         }
402                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
403                 }
404
405                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
406                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
407                         offset = cmd[3] << 10;
408                         if (radeon_check_and_fixup_offset
409                             (dev_priv, file_priv, &offset)) {
410                                 DRM_ERROR("Invalid second packet offset\n");
411                                 return -EINVAL;
412                         }
413                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
414                 }
415                 break;
416
417         default:
418                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
419                 return -EINVAL;
420         }
421
422         return 0;
423 }
424
425 /* ================================================================
426  * CP hardware state programming functions
427  */
428
429 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
430                                              struct drm_clip_rect * box)
431 {
432         RING_LOCALS;
433
434         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
435                   box->x1, box->y1, box->x2, box->y2);
436
437         BEGIN_RING(4);
438         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
439         OUT_RING((box->y1 << 16) | box->x1);
440         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
441         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
442         ADVANCE_RING();
443 }
444
445 /* Emit 1.1 state
446  */
447 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
448                              struct drm_file *file_priv,
449                              drm_radeon_context_regs_t * ctx,
450                              drm_radeon_texture_regs_t * tex,
451                              unsigned int dirty)
452 {
453         RING_LOCALS;
454         DRM_DEBUG("dirty=0x%08x\n", dirty);
455
456         if (dirty & RADEON_UPLOAD_CONTEXT) {
457                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
458                                                   &ctx->rb3d_depthoffset)) {
459                         DRM_ERROR("Invalid depth buffer offset\n");
460                         return -EINVAL;
461                 }
462
463                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
464                                                   &ctx->rb3d_coloroffset)) {
465                         DRM_ERROR("Invalid depth buffer offset\n");
466                         return -EINVAL;
467                 }
468
469                 BEGIN_RING(14);
470                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
471                 OUT_RING(ctx->pp_misc);
472                 OUT_RING(ctx->pp_fog_color);
473                 OUT_RING(ctx->re_solid_color);
474                 OUT_RING(ctx->rb3d_blendcntl);
475                 OUT_RING(ctx->rb3d_depthoffset);
476                 OUT_RING(ctx->rb3d_depthpitch);
477                 OUT_RING(ctx->rb3d_zstencilcntl);
478                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
479                 OUT_RING(ctx->pp_cntl);
480                 OUT_RING(ctx->rb3d_cntl);
481                 OUT_RING(ctx->rb3d_coloroffset);
482                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
483                 OUT_RING(ctx->rb3d_colorpitch);
484                 ADVANCE_RING();
485         }
486
487         if (dirty & RADEON_UPLOAD_VERTFMT) {
488                 BEGIN_RING(2);
489                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
490                 OUT_RING(ctx->se_coord_fmt);
491                 ADVANCE_RING();
492         }
493
494         if (dirty & RADEON_UPLOAD_LINE) {
495                 BEGIN_RING(5);
496                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
497                 OUT_RING(ctx->re_line_pattern);
498                 OUT_RING(ctx->re_line_state);
499                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
500                 OUT_RING(ctx->se_line_width);
501                 ADVANCE_RING();
502         }
503
504         if (dirty & RADEON_UPLOAD_BUMPMAP) {
505                 BEGIN_RING(5);
506                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
507                 OUT_RING(ctx->pp_lum_matrix);
508                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
509                 OUT_RING(ctx->pp_rot_matrix_0);
510                 OUT_RING(ctx->pp_rot_matrix_1);
511                 ADVANCE_RING();
512         }
513
514         if (dirty & RADEON_UPLOAD_MASKS) {
515                 BEGIN_RING(4);
516                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
517                 OUT_RING(ctx->rb3d_stencilrefmask);
518                 OUT_RING(ctx->rb3d_ropcntl);
519                 OUT_RING(ctx->rb3d_planemask);
520                 ADVANCE_RING();
521         }
522
523         if (dirty & RADEON_UPLOAD_VIEWPORT) {
524                 BEGIN_RING(7);
525                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
526                 OUT_RING(ctx->se_vport_xscale);
527                 OUT_RING(ctx->se_vport_xoffset);
528                 OUT_RING(ctx->se_vport_yscale);
529                 OUT_RING(ctx->se_vport_yoffset);
530                 OUT_RING(ctx->se_vport_zscale);
531                 OUT_RING(ctx->se_vport_zoffset);
532                 ADVANCE_RING();
533         }
534
535         if (dirty & RADEON_UPLOAD_SETUP) {
536                 BEGIN_RING(4);
537                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
538                 OUT_RING(ctx->se_cntl);
539                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
540                 OUT_RING(ctx->se_cntl_status);
541                 ADVANCE_RING();
542         }
543
544         if (dirty & RADEON_UPLOAD_MISC) {
545                 BEGIN_RING(2);
546                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
547                 OUT_RING(ctx->re_misc);
548                 ADVANCE_RING();
549         }
550
551         if (dirty & RADEON_UPLOAD_TEX0) {
552                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
553                                                   &tex[0].pp_txoffset)) {
554                         DRM_ERROR("Invalid texture offset for unit 0\n");
555                         return -EINVAL;
556                 }
557
558                 BEGIN_RING(9);
559                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
560                 OUT_RING(tex[0].pp_txfilter);
561                 OUT_RING(tex[0].pp_txformat);
562                 OUT_RING(tex[0].pp_txoffset);
563                 OUT_RING(tex[0].pp_txcblend);
564                 OUT_RING(tex[0].pp_txablend);
565                 OUT_RING(tex[0].pp_tfactor);
566                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
567                 OUT_RING(tex[0].pp_border_color);
568                 ADVANCE_RING();
569         }
570
571         if (dirty & RADEON_UPLOAD_TEX1) {
572                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
573                                                   &tex[1].pp_txoffset)) {
574                         DRM_ERROR("Invalid texture offset for unit 1\n");
575                         return -EINVAL;
576                 }
577
578                 BEGIN_RING(9);
579                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
580                 OUT_RING(tex[1].pp_txfilter);
581                 OUT_RING(tex[1].pp_txformat);
582                 OUT_RING(tex[1].pp_txoffset);
583                 OUT_RING(tex[1].pp_txcblend);
584                 OUT_RING(tex[1].pp_txablend);
585                 OUT_RING(tex[1].pp_tfactor);
586                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
587                 OUT_RING(tex[1].pp_border_color);
588                 ADVANCE_RING();
589         }
590
591         if (dirty & RADEON_UPLOAD_TEX2) {
592                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
593                                                   &tex[2].pp_txoffset)) {
594                         DRM_ERROR("Invalid texture offset for unit 2\n");
595                         return -EINVAL;
596                 }
597
598                 BEGIN_RING(9);
599                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
600                 OUT_RING(tex[2].pp_txfilter);
601                 OUT_RING(tex[2].pp_txformat);
602                 OUT_RING(tex[2].pp_txoffset);
603                 OUT_RING(tex[2].pp_txcblend);
604                 OUT_RING(tex[2].pp_txablend);
605                 OUT_RING(tex[2].pp_tfactor);
606                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
607                 OUT_RING(tex[2].pp_border_color);
608                 ADVANCE_RING();
609         }
610
611         return 0;
612 }
613
614 /* Emit 1.2 state
615  */
616 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
617                               struct drm_file *file_priv,
618                               drm_radeon_state_t * state)
619 {
620         RING_LOCALS;
621
622         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
623                 BEGIN_RING(3);
624                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
625                 OUT_RING(state->context2.se_zbias_factor);
626                 OUT_RING(state->context2.se_zbias_constant);
627                 ADVANCE_RING();
628         }
629
630         return radeon_emit_state(dev_priv, file_priv, &state->context,
631                                  state->tex, state->dirty);
632 }
633
634 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
635  * 1.3 cmdbuffers allow all previous state to be updated as well as
636  * the tcl scalar and vector areas.
637  */
638 static struct {
639         int start;
640         int len;
641         const char *name;
642 } packet[RADEON_MAX_STATE_PACKETS] = {
643         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
644         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
645         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
646         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
647         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
648         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
649         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
650         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
651         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
652         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
653         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
654         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
655         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
656         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
657         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
658         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
659         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
660         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
661         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
662         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
663         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
664                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
665         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
666         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
667         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
668         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
669         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
670         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
671         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
672         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
673         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
674         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
675         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
676         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
677         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
678         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
679         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
680         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
681         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
682         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
683         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
684         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
685         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
686         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
687         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
688         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
689         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
690         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
691         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
692         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
693         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
694          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
695         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
696         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
697         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
698         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
699         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
700         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
701         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
702         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
703         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
704         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
705         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
706                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
707         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
708         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
709         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
710         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
711         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
712         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
713         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
714         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
715         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
716         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
717         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
718         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
719         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
720         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
721         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
722         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
723         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
724         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
725         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
726         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
727         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
728         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
729         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
730         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
731         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
732         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
733         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
734         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
735         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
736         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
737         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
738         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
739         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
740         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
741 };
742
743 /* ================================================================
744  * Performance monitoring functions
745  */
746
747 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
748                              int x, int y, int w, int h, int r, int g, int b)
749 {
750         u32 color;
751         RING_LOCALS;
752
753         x += dev_priv->sarea_priv->boxes[0].x1;
754         y += dev_priv->sarea_priv->boxes[0].y1;
755
756         switch (dev_priv->color_fmt) {
757         case RADEON_COLOR_FORMAT_RGB565:
758                 color = (((r & 0xf8) << 8) |
759                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
760                 break;
761         case RADEON_COLOR_FORMAT_ARGB8888:
762         default:
763                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
764                 break;
765         }
766
767         BEGIN_RING(4);
768         RADEON_WAIT_UNTIL_3D_IDLE();
769         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
770         OUT_RING(0xffffffff);
771         ADVANCE_RING();
772
773         BEGIN_RING(6);
774
775         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
776         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
777                  RADEON_GMC_BRUSH_SOLID_COLOR |
778                  (dev_priv->color_fmt << 8) |
779                  RADEON_GMC_SRC_DATATYPE_COLOR |
780                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
781
782         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
783                 OUT_RING(dev_priv->front_pitch_offset);
784         } else {
785                 OUT_RING(dev_priv->back_pitch_offset);
786         }
787
788         OUT_RING(color);
789
790         OUT_RING((x << 16) | y);
791         OUT_RING((w << 16) | h);
792
793         ADVANCE_RING();
794 }
795
796 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
797 {
798         /* Collapse various things into a wait flag -- trying to
799          * guess if userspase slept -- better just to have them tell us.
800          */
801         if (dev_priv->stats.last_frame_reads > 1 ||
802             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
803                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
804         }
805
806         if (dev_priv->stats.freelist_loops) {
807                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
808         }
809
810         /* Purple box for page flipping
811          */
812         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
813                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
814
815         /* Red box if we have to wait for idle at any point
816          */
817         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
818                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
819
820         /* Blue box: lost context?
821          */
822
823         /* Yellow box for texture swaps
824          */
825         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
826                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
827
828         /* Green box if hardware never idles (as far as we can tell)
829          */
830         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
831                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
832
833         /* Draw bars indicating number of buffers allocated
834          * (not a great measure, easily confused)
835          */
836         if (dev_priv->stats.requested_bufs) {
837                 if (dev_priv->stats.requested_bufs > 100)
838                         dev_priv->stats.requested_bufs = 100;
839
840                 radeon_clear_box(dev_priv, 4, 16,
841                                  dev_priv->stats.requested_bufs, 4,
842                                  196, 128, 128);
843         }
844
845         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
846
847 }
848
849 /* ================================================================
850  * CP command dispatch functions
851  */
852
853 static void radeon_cp_dispatch_clear(struct drm_device * dev,
854                                      drm_radeon_clear_t * clear,
855                                      drm_radeon_clear_rect_t * depth_boxes)
856 {
857         drm_radeon_private_t *dev_priv = dev->dev_private;
858         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
859         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
860         int nbox = sarea_priv->nbox;
861         struct drm_clip_rect *pbox = sarea_priv->boxes;
862         unsigned int flags = clear->flags;
863         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
864         int i;
865         RING_LOCALS;
866         DRM_DEBUG("flags = 0x%x\n", flags);
867
868         dev_priv->stats.clears++;
869
870         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
871                 unsigned int tmp = flags;
872
873                 flags &= ~(RADEON_FRONT | RADEON_BACK);
874                 if (tmp & RADEON_FRONT)
875                         flags |= RADEON_BACK;
876                 if (tmp & RADEON_BACK)
877                         flags |= RADEON_FRONT;
878         }
879
880         if (flags & (RADEON_FRONT | RADEON_BACK)) {
881
882                 BEGIN_RING(4);
883
884                 /* Ensure the 3D stream is idle before doing a
885                  * 2D fill to clear the front or back buffer.
886                  */
887                 RADEON_WAIT_UNTIL_3D_IDLE();
888
889                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
890                 OUT_RING(clear->color_mask);
891
892                 ADVANCE_RING();
893
894                 /* Make sure we restore the 3D state next time.
895                  */
896                 dev_priv->sarea_priv->ctx_owner = 0;
897
898                 for (i = 0; i < nbox; i++) {
899                         int x = pbox[i].x1;
900                         int y = pbox[i].y1;
901                         int w = pbox[i].x2 - x;
902                         int h = pbox[i].y2 - y;
903
904                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
905                                   x, y, w, h, flags);
906
907                         if (flags & RADEON_FRONT) {
908                                 BEGIN_RING(6);
909
910                                 OUT_RING(CP_PACKET3
911                                          (RADEON_CNTL_PAINT_MULTI, 4));
912                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
913                                          RADEON_GMC_BRUSH_SOLID_COLOR |
914                                          (dev_priv->
915                                           color_fmt << 8) |
916                                          RADEON_GMC_SRC_DATATYPE_COLOR |
917                                          RADEON_ROP3_P |
918                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
919
920                                 OUT_RING(dev_priv->front_pitch_offset);
921                                 OUT_RING(clear->clear_color);
922
923                                 OUT_RING((x << 16) | y);
924                                 OUT_RING((w << 16) | h);
925
926                                 ADVANCE_RING();
927                         }
928
929                         if (flags & RADEON_BACK) {
930                                 BEGIN_RING(6);
931
932                                 OUT_RING(CP_PACKET3
933                                          (RADEON_CNTL_PAINT_MULTI, 4));
934                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
935                                          RADEON_GMC_BRUSH_SOLID_COLOR |
936                                          (dev_priv->
937                                           color_fmt << 8) |
938                                          RADEON_GMC_SRC_DATATYPE_COLOR |
939                                          RADEON_ROP3_P |
940                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
941
942                                 OUT_RING(dev_priv->back_pitch_offset);
943                                 OUT_RING(clear->clear_color);
944
945                                 OUT_RING((x << 16) | y);
946                                 OUT_RING((w << 16) | h);
947
948                                 ADVANCE_RING();
949                         }
950                 }
951         }
952
953         /* hyper z clear */
954         /* no docs available, based on reverse engeneering by Stephane Marchesin */
955         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
956             && (flags & RADEON_CLEAR_FASTZ)) {
957
958                 int i;
959                 int depthpixperline =
960                     dev_priv->depth_fmt ==
961                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
962                                                        2) : (dev_priv->
963                                                              depth_pitch / 4);
964
965                 u32 clearmask;
966
967                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
968                     ((clear->depth_mask & 0xff) << 24);
969
970                 /* Make sure we restore the 3D state next time.
971                  * we haven't touched any "normal" state - still need this?
972                  */
973                 dev_priv->sarea_priv->ctx_owner = 0;
974
975                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
976                     && (flags & RADEON_USE_HIERZ)) {
977                         /* FIXME : reverse engineer that for Rx00 cards */
978                         /* FIXME : the mask supposedly contains low-res z values. So can't set
979                            just to the max (0xff? or actually 0x3fff?), need to take z clear
980                            value into account? */
981                         /* pattern seems to work for r100, though get slight
982                            rendering errors with glxgears. If hierz is not enabled for r100,
983                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
984                            other ones are ignored, and the same clear mask can be used. That's
985                            very different behaviour than R200 which needs different clear mask
986                            and different number of tiles to clear if hierz is enabled or not !?!
987                          */
988                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
989                 } else {
990                         /* clear mask : chooses the clearing pattern.
991                            rv250: could be used to clear only parts of macrotiles
992                            (but that would get really complicated...)?
993                            bit 0 and 1 (either or both of them ?!?!) are used to
994                            not clear tile (or maybe one of the bits indicates if the tile is
995                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
996                            Pattern is as follows:
997                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
998                            bits -------------------------------------------------
999                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1000                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1001                            covers 256 pixels ?!?
1002                          */
1003                         clearmask = 0x0;
1004                 }
1005
1006                 BEGIN_RING(8);
1007                 RADEON_WAIT_UNTIL_2D_IDLE();
1008                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1009                              tempRB3D_DEPTHCLEARVALUE);
1010                 /* what offset is this exactly ? */
1011                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1012                 /* need ctlstat, otherwise get some strange black flickering */
1013                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1014                              RADEON_RB3D_ZC_FLUSH_ALL);
1015                 ADVANCE_RING();
1016
1017                 for (i = 0; i < nbox; i++) {
1018                         int tileoffset, nrtilesx, nrtilesy, j;
1019                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1020                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1021                             && (dev_priv->chip_family < CHIP_R200)) {
1022                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1023                                    maybe r200 actually doesn't need to put the low-res z value into
1024                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1025                                    Works for R100, both with hierz and without.
1026                                    R100 seems to operate on 2x1 8x8 tiles, but...
1027                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1028                                    problematic with resolutions which are not 64 pix aligned? */
1029                                 tileoffset =
1030                                     ((pbox[i].y1 >> 3) * depthpixperline +
1031                                      pbox[i].x1) >> 6;
1032                                 nrtilesx =
1033                                     ((pbox[i].x2 & ~63) -
1034                                      (pbox[i].x1 & ~63)) >> 4;
1035                                 nrtilesy =
1036                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1037                                 for (j = 0; j <= nrtilesy; j++) {
1038                                         BEGIN_RING(4);
1039                                         OUT_RING(CP_PACKET3
1040                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1041                                         /* first tile */
1042                                         OUT_RING(tileoffset * 8);
1043                                         /* the number of tiles to clear */
1044                                         OUT_RING(nrtilesx + 4);
1045                                         /* clear mask : chooses the clearing pattern. */
1046                                         OUT_RING(clearmask);
1047                                         ADVANCE_RING();
1048                                         tileoffset += depthpixperline >> 6;
1049                                 }
1050                         } else if ((dev_priv->chip_family >= CHIP_R200) &&
1051                                    (dev_priv->chip_family <= CHIP_RV280)) {
1052                                 /* works for rv250. */
1053                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1054                                 tileoffset =
1055                                     ((pbox[i].y1 >> 3) * depthpixperline +
1056                                      pbox[i].x1) >> 5;
1057                                 nrtilesx =
1058                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1059                                 nrtilesy =
1060                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1061                                 for (j = 0; j <= nrtilesy; j++) {
1062                                         BEGIN_RING(4);
1063                                         OUT_RING(CP_PACKET3
1064                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1065                                         /* first tile */
1066                                         /* judging by the first tile offset needed, could possibly
1067                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1068                                            macro tiles, though would still need clear mask for
1069                                            right/bottom if truely 4x4 granularity is desired ? */
1070                                         OUT_RING(tileoffset * 16);
1071                                         /* the number of tiles to clear */
1072                                         OUT_RING(nrtilesx + 1);
1073                                         /* clear mask : chooses the clearing pattern. */
1074                                         OUT_RING(clearmask);
1075                                         ADVANCE_RING();
1076                                         tileoffset += depthpixperline >> 5;
1077                                 }
1078                         } else {        /* rv 100 */
1079                                 /* rv100 might not need 64 pix alignment, who knows */
1080                                 /* offsets are, hmm, weird */
1081                                 tileoffset =
1082                                     ((pbox[i].y1 >> 4) * depthpixperline +
1083                                      pbox[i].x1) >> 6;
1084                                 nrtilesx =
1085                                     ((pbox[i].x2 & ~63) -
1086                                      (pbox[i].x1 & ~63)) >> 4;
1087                                 nrtilesy =
1088                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1089                                 for (j = 0; j <= nrtilesy; j++) {
1090                                         BEGIN_RING(4);
1091                                         OUT_RING(CP_PACKET3
1092                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1093                                         OUT_RING(tileoffset * 128);
1094                                         /* the number of tiles to clear */
1095                                         OUT_RING(nrtilesx + 4);
1096                                         /* clear mask : chooses the clearing pattern. */
1097                                         OUT_RING(clearmask);
1098                                         ADVANCE_RING();
1099                                         tileoffset += depthpixperline >> 6;
1100                                 }
1101                         }
1102                 }
1103
1104                 /* TODO don't always clear all hi-level z tiles */
1105                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1106                     && ((dev_priv->chip_family >= CHIP_R200) &&
1107                         (dev_priv->chip_family <= CHIP_RV280))
1108                     && (flags & RADEON_USE_HIERZ))
1109                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1110                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1111                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1112                            value into account? */
1113                 {
1114                         BEGIN_RING(4);
1115                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1116                         OUT_RING(0x0);  /* First tile */
1117                         OUT_RING(0x3cc0);
1118                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1119                         ADVANCE_RING();
1120                 }
1121         }
1122
1123         /* We have to clear the depth and/or stencil buffers by
1124          * rendering a quad into just those buffers.  Thus, we have to
1125          * make sure the 3D engine is configured correctly.
1126          */
1127         else if ((dev_priv->chip_family >= CHIP_R200) &&
1128                  (dev_priv->chip_family <= CHIP_RV280) &&
1129                  (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1130
1131                 int tempPP_CNTL;
1132                 int tempRE_CNTL;
1133                 int tempRB3D_CNTL;
1134                 int tempRB3D_ZSTENCILCNTL;
1135                 int tempRB3D_STENCILREFMASK;
1136                 int tempRB3D_PLANEMASK;
1137                 int tempSE_CNTL;
1138                 int tempSE_VTE_CNTL;
1139                 int tempSE_VTX_FMT_0;
1140                 int tempSE_VTX_FMT_1;
1141                 int tempSE_VAP_CNTL;
1142                 int tempRE_AUX_SCISSOR_CNTL;
1143
1144                 tempPP_CNTL = 0;
1145                 tempRE_CNTL = 0;
1146
1147                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1148
1149                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1150                 tempRB3D_STENCILREFMASK = 0x0;
1151
1152                 tempSE_CNTL = depth_clear->se_cntl;
1153
1154                 /* Disable TCL */
1155
1156                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1157                                           (0x9 <<
1158                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1159
1160                 tempRB3D_PLANEMASK = 0x0;
1161
1162                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1163
1164                 tempSE_VTE_CNTL =
1165                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1166
1167                 /* Vertex format (X, Y, Z, W) */
1168                 tempSE_VTX_FMT_0 =
1169                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1170                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1171                 tempSE_VTX_FMT_1 = 0x0;
1172
1173                 /*
1174                  * Depth buffer specific enables
1175                  */
1176                 if (flags & RADEON_DEPTH) {
1177                         /* Enable depth buffer */
1178                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1179                 } else {
1180                         /* Disable depth buffer */
1181                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1182                 }
1183
1184                 /*
1185                  * Stencil buffer specific enables
1186                  */
1187                 if (flags & RADEON_STENCIL) {
1188                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1189                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1190                 } else {
1191                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1192                         tempRB3D_STENCILREFMASK = 0x00000000;
1193                 }
1194
1195                 if (flags & RADEON_USE_COMP_ZBUF) {
1196                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1197                             RADEON_Z_DECOMPRESSION_ENABLE;
1198                 }
1199                 if (flags & RADEON_USE_HIERZ) {
1200                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1201                 }
1202
1203                 BEGIN_RING(26);
1204                 RADEON_WAIT_UNTIL_2D_IDLE();
1205
1206                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1207                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1208                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1209                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1210                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1211                              tempRB3D_STENCILREFMASK);
1212                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1213                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1214                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1215                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1216                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1217                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1218                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1219                 ADVANCE_RING();
1220
1221                 /* Make sure we restore the 3D state next time.
1222                  */
1223                 dev_priv->sarea_priv->ctx_owner = 0;
1224
1225                 for (i = 0; i < nbox; i++) {
1226
1227                         /* Funny that this should be required --
1228                          *  sets top-left?
1229                          */
1230                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1231
1232                         BEGIN_RING(14);
1233                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1234                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1235                                   RADEON_PRIM_WALK_RING |
1236                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1237                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1238                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1239                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1240                         OUT_RING(0x3f800000);
1241                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1242                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1243                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1244                         OUT_RING(0x3f800000);
1245                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1246                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1247                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1248                         OUT_RING(0x3f800000);
1249                         ADVANCE_RING();
1250                 }
1251         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1252
1253                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1254
1255                 rb3d_cntl = depth_clear->rb3d_cntl;
1256
1257                 if (flags & RADEON_DEPTH) {
1258                         rb3d_cntl |= RADEON_Z_ENABLE;
1259                 } else {
1260                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1261                 }
1262
1263                 if (flags & RADEON_STENCIL) {
1264                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1265                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1266                 } else {
1267                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1268                         rb3d_stencilrefmask = 0x00000000;
1269                 }
1270
1271                 if (flags & RADEON_USE_COMP_ZBUF) {
1272                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1273                             RADEON_Z_DECOMPRESSION_ENABLE;
1274                 }
1275                 if (flags & RADEON_USE_HIERZ) {
1276                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1277                 }
1278
1279                 BEGIN_RING(13);
1280                 RADEON_WAIT_UNTIL_2D_IDLE();
1281
1282                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1283                 OUT_RING(0x00000000);
1284                 OUT_RING(rb3d_cntl);
1285
1286                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1287                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1288                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1289                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1290                 ADVANCE_RING();
1291
1292                 /* Make sure we restore the 3D state next time.
1293                  */
1294                 dev_priv->sarea_priv->ctx_owner = 0;
1295
1296                 for (i = 0; i < nbox; i++) {
1297
1298                         /* Funny that this should be required --
1299                          *  sets top-left?
1300                          */
1301                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1302
1303                         BEGIN_RING(15);
1304
1305                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1306                         OUT_RING(RADEON_VTX_Z_PRESENT |
1307                                  RADEON_VTX_PKCOLOR_PRESENT);
1308                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1309                                   RADEON_PRIM_WALK_RING |
1310                                   RADEON_MAOS_ENABLE |
1311                                   RADEON_VTX_FMT_RADEON_MODE |
1312                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1313
1314                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1315                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1316                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1317                         OUT_RING(0x0);
1318
1319                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1320                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1321                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1322                         OUT_RING(0x0);
1323
1324                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1325                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1326                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1327                         OUT_RING(0x0);
1328
1329                         ADVANCE_RING();
1330                 }
1331         }
1332
1333         /* Increment the clear counter.  The client-side 3D driver must
1334          * wait on this value before performing the clear ioctl.  We
1335          * need this because the card's so damned fast...
1336          */
1337         dev_priv->sarea_priv->last_clear++;
1338
1339         BEGIN_RING(4);
1340
1341         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1342         RADEON_WAIT_UNTIL_IDLE();
1343
1344         ADVANCE_RING();
1345 }
1346
1347 static void radeon_cp_dispatch_swap(struct drm_device * dev)
1348 {
1349         drm_radeon_private_t *dev_priv = dev->dev_private;
1350         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1351         int nbox = sarea_priv->nbox;
1352         struct drm_clip_rect *pbox = sarea_priv->boxes;
1353         int i;
1354         RING_LOCALS;
1355         DRM_DEBUG("\n");
1356
1357         /* Do some trivial performance monitoring...
1358          */
1359         if (dev_priv->do_boxes)
1360                 radeon_cp_performance_boxes(dev_priv);
1361
1362         /* Wait for the 3D stream to idle before dispatching the bitblt.
1363          * This will prevent data corruption between the two streams.
1364          */
1365         BEGIN_RING(2);
1366
1367         RADEON_WAIT_UNTIL_3D_IDLE();
1368
1369         ADVANCE_RING();
1370
1371         for (i = 0; i < nbox; i++) {
1372                 int x = pbox[i].x1;
1373                 int y = pbox[i].y1;
1374                 int w = pbox[i].x2 - x;
1375                 int h = pbox[i].y2 - y;
1376
1377                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1378
1379                 BEGIN_RING(9);
1380
1381                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1382                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1383                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1384                          RADEON_GMC_BRUSH_NONE |
1385                          (dev_priv->color_fmt << 8) |
1386                          RADEON_GMC_SRC_DATATYPE_COLOR |
1387                          RADEON_ROP3_S |
1388                          RADEON_DP_SRC_SOURCE_MEMORY |
1389                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1390
1391                 /* Make this work even if front & back are flipped:
1392                  */
1393                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1394                 if (dev_priv->sarea_priv->pfCurrentPage == 0) {
1395                         OUT_RING(dev_priv->back_pitch_offset);
1396                         OUT_RING(dev_priv->front_pitch_offset);
1397                 } else {
1398                         OUT_RING(dev_priv->front_pitch_offset);
1399                         OUT_RING(dev_priv->back_pitch_offset);
1400                 }
1401
1402                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1403                 OUT_RING((x << 16) | y);
1404                 OUT_RING((x << 16) | y);
1405                 OUT_RING((w << 16) | h);
1406
1407                 ADVANCE_RING();
1408         }
1409
1410         /* Increment the frame counter.  The client-side 3D driver must
1411          * throttle the framerate by waiting for this value before
1412          * performing the swapbuffer ioctl.
1413          */
1414         dev_priv->sarea_priv->last_frame++;
1415
1416         BEGIN_RING(4);
1417
1418         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1419         RADEON_WAIT_UNTIL_2D_IDLE();
1420
1421         ADVANCE_RING();
1422 }
1423
1424 static void radeon_cp_dispatch_flip(struct drm_device * dev)
1425 {
1426         drm_radeon_private_t *dev_priv = dev->dev_private;
1427         struct drm_sarea *sarea = (struct drm_sarea *) dev_priv->sarea->handle;
1428         int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1429             ? dev_priv->front_offset : dev_priv->back_offset;
1430         RING_LOCALS;
1431         DRM_DEBUG("pfCurrentPage=%d\n",
1432                   dev_priv->sarea_priv->pfCurrentPage);
1433
1434         /* Do some trivial performance monitoring...
1435          */
1436         if (dev_priv->do_boxes) {
1437                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1438                 radeon_cp_performance_boxes(dev_priv);
1439         }
1440
1441         /* Update the frame offsets for both CRTCs
1442          */
1443         BEGIN_RING(6);
1444
1445         RADEON_WAIT_UNTIL_3D_IDLE();
1446         OUT_RING_REG(RADEON_CRTC_OFFSET,
1447                      ((sarea->frame.y * dev_priv->front_pitch +
1448                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1449                      + offset);
1450         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1451                      + offset);
1452
1453         ADVANCE_RING();
1454
1455         /* Increment the frame counter.  The client-side 3D driver must
1456          * throttle the framerate by waiting for this value before
1457          * performing the swapbuffer ioctl.
1458          */
1459         dev_priv->sarea_priv->last_frame++;
1460         dev_priv->sarea_priv->pfCurrentPage =
1461                 1 - dev_priv->sarea_priv->pfCurrentPage;
1462
1463         BEGIN_RING(2);
1464
1465         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1466
1467         ADVANCE_RING();
1468 }
1469
1470 static int bad_prim_vertex_nr(int primitive, int nr)
1471 {
1472         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1473         case RADEON_PRIM_TYPE_NONE:
1474         case RADEON_PRIM_TYPE_POINT:
1475                 return nr < 1;
1476         case RADEON_PRIM_TYPE_LINE:
1477                 return (nr & 1) || nr == 0;
1478         case RADEON_PRIM_TYPE_LINE_STRIP:
1479                 return nr < 2;
1480         case RADEON_PRIM_TYPE_TRI_LIST:
1481         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1482         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1483         case RADEON_PRIM_TYPE_RECT_LIST:
1484                 return nr % 3 || nr == 0;
1485         case RADEON_PRIM_TYPE_TRI_FAN:
1486         case RADEON_PRIM_TYPE_TRI_STRIP:
1487                 return nr < 3;
1488         default:
1489                 return 1;
1490         }
1491 }
1492
1493 typedef struct {
1494         unsigned int start;
1495         unsigned int finish;
1496         unsigned int prim;
1497         unsigned int numverts;
1498         unsigned int offset;
1499         unsigned int vc_format;
1500 } drm_radeon_tcl_prim_t;
1501
1502 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1503                                       struct drm_buf * buf,
1504                                       drm_radeon_tcl_prim_t * prim)
1505 {
1506         drm_radeon_private_t *dev_priv = dev->dev_private;
1507         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1508         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1509         int numverts = (int)prim->numverts;
1510         int nbox = sarea_priv->nbox;
1511         int i = 0;
1512         RING_LOCALS;
1513
1514         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1515                   prim->prim,
1516                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1517
1518         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1519                 DRM_ERROR("bad prim %x numverts %d\n",
1520                           prim->prim, prim->numverts);
1521                 return;
1522         }
1523
1524         do {
1525                 /* Emit the next cliprect */
1526                 if (i < nbox) {
1527                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1528                 }
1529
1530                 /* Emit the vertex buffer rendering commands */
1531                 BEGIN_RING(5);
1532
1533                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1534                 OUT_RING(offset);
1535                 OUT_RING(numverts);
1536                 OUT_RING(prim->vc_format);
1537                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1538                          RADEON_COLOR_ORDER_RGBA |
1539                          RADEON_VTX_FMT_RADEON_MODE |
1540                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1541
1542                 ADVANCE_RING();
1543
1544                 i++;
1545         } while (i < nbox);
1546 }
1547
1548 static void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
1549 {
1550         drm_radeon_private_t *dev_priv = dev->dev_private;
1551         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1552         RING_LOCALS;
1553
1554         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1555
1556         /* Emit the vertex buffer age */
1557         BEGIN_RING(2);
1558         RADEON_DISPATCH_AGE(buf_priv->age);
1559         ADVANCE_RING();
1560
1561         buf->pending = 1;
1562         buf->used = 0;
1563 }
1564
1565 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1566                                         struct drm_buf * buf, int start, int end)
1567 {
1568         drm_radeon_private_t *dev_priv = dev->dev_private;
1569         RING_LOCALS;
1570         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1571
1572         if (start != end) {
1573                 int offset = (dev_priv->gart_buffers_offset
1574                               + buf->offset + start);
1575                 int dwords = (end - start + 3) / sizeof(u32);
1576
1577                 /* Indirect buffer data must be an even number of
1578                  * dwords, so if we've been given an odd number we must
1579                  * pad the data with a Type-2 CP packet.
1580                  */
1581                 if (dwords & 1) {
1582                         u32 *data = (u32 *)
1583                             ((char *)dev->agp_buffer_map->handle
1584                              + buf->offset + start);
1585                         data[dwords++] = RADEON_CP_PACKET2;
1586                 }
1587
1588                 /* Fire off the indirect buffer */
1589                 BEGIN_RING(3);
1590
1591                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1592                 OUT_RING(offset);
1593                 OUT_RING(dwords);
1594
1595                 ADVANCE_RING();
1596         }
1597 }
1598
1599 static void radeon_cp_dispatch_indices(struct drm_device * dev,
1600                                        struct drm_buf * elt_buf,
1601                                        drm_radeon_tcl_prim_t * prim)
1602 {
1603         drm_radeon_private_t *dev_priv = dev->dev_private;
1604         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1605         int offset = dev_priv->gart_buffers_offset + prim->offset;
1606         u32 *data;
1607         int dwords;
1608         int i = 0;
1609         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1610         int count = (prim->finish - start) / sizeof(u16);
1611         int nbox = sarea_priv->nbox;
1612
1613         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1614                   prim->prim,
1615                   prim->vc_format,
1616                   prim->start, prim->finish, prim->offset, prim->numverts);
1617
1618         if (bad_prim_vertex_nr(prim->prim, count)) {
1619                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1620                 return;
1621         }
1622
1623         if (start >= prim->finish || (prim->start & 0x7)) {
1624                 DRM_ERROR("buffer prim %d\n", prim->prim);
1625                 return;
1626         }
1627
1628         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1629
1630         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1631                         elt_buf->offset + prim->start);
1632
1633         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1634         data[1] = offset;
1635         data[2] = prim->numverts;
1636         data[3] = prim->vc_format;
1637         data[4] = (prim->prim |
1638                    RADEON_PRIM_WALK_IND |
1639                    RADEON_COLOR_ORDER_RGBA |
1640                    RADEON_VTX_FMT_RADEON_MODE |
1641                    (count << RADEON_NUM_VERTICES_SHIFT));
1642
1643         do {
1644                 if (i < nbox)
1645                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1646
1647                 radeon_cp_dispatch_indirect(dev, elt_buf,
1648                                             prim->start, prim->finish);
1649
1650                 i++;
1651         } while (i < nbox);
1652
1653 }
1654
1655 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1656
1657 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1658                                       struct drm_file *file_priv,
1659                                       drm_radeon_texture_t * tex,
1660                                       drm_radeon_tex_image_t * image)
1661 {
1662         drm_radeon_private_t *dev_priv = dev->dev_private;
1663         struct drm_buf *buf;
1664         u32 format;
1665         u32 *buffer;
1666         const u8 __user *data;
1667         int size, dwords, tex_width, blit_width, spitch;
1668         u32 height;
1669         int i;
1670         u32 texpitch, microtile;
1671         u32 offset, byte_offset;
1672         RING_LOCALS;
1673
1674         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1675                 DRM_ERROR("Invalid destination offset\n");
1676                 return -EINVAL;
1677         }
1678
1679         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1680
1681         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1682          * up with the texture data from the host data blit, otherwise
1683          * part of the texture image may be corrupted.
1684          */
1685         BEGIN_RING(4);
1686         RADEON_FLUSH_CACHE();
1687         RADEON_WAIT_UNTIL_IDLE();
1688         ADVANCE_RING();
1689
1690         /* The compiler won't optimize away a division by a variable,
1691          * even if the only legal values are powers of two.  Thus, we'll
1692          * use a shift instead.
1693          */
1694         switch (tex->format) {
1695         case RADEON_TXFORMAT_ARGB8888:
1696         case RADEON_TXFORMAT_RGBA8888:
1697                 format = RADEON_COLOR_FORMAT_ARGB8888;
1698                 tex_width = tex->width * 4;
1699                 blit_width = image->width * 4;
1700                 break;
1701         case RADEON_TXFORMAT_AI88:
1702         case RADEON_TXFORMAT_ARGB1555:
1703         case RADEON_TXFORMAT_RGB565:
1704         case RADEON_TXFORMAT_ARGB4444:
1705         case RADEON_TXFORMAT_VYUY422:
1706         case RADEON_TXFORMAT_YVYU422:
1707                 format = RADEON_COLOR_FORMAT_RGB565;
1708                 tex_width = tex->width * 2;
1709                 blit_width = image->width * 2;
1710                 break;
1711         case RADEON_TXFORMAT_I8:
1712         case RADEON_TXFORMAT_RGB332:
1713                 format = RADEON_COLOR_FORMAT_CI8;
1714                 tex_width = tex->width * 1;
1715                 blit_width = image->width * 1;
1716                 break;
1717         default:
1718                 DRM_ERROR("invalid texture format %d\n", tex->format);
1719                 return -EINVAL;
1720         }
1721         spitch = blit_width >> 6;
1722         if (spitch == 0 && image->height > 1)
1723                 return -EINVAL;
1724
1725         texpitch = tex->pitch;
1726         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1727                 microtile = 1;
1728                 if (tex_width < 64) {
1729                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1730                         /* we got tiled coordinates, untile them */
1731                         image->x *= 2;
1732                 }
1733         } else
1734                 microtile = 0;
1735
1736         /* this might fail for zero-sized uploads - are those illegal? */
1737         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1738                                 blit_width - 1)) {
1739                 DRM_ERROR("Invalid final destination offset\n");
1740                 return -EINVAL;
1741         }
1742
1743         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1744
1745         do {
1746                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1747                           tex->offset >> 10, tex->pitch, tex->format,
1748                           image->x, image->y, image->width, image->height);
1749
1750                 /* Make a copy of some parameters in case we have to
1751                  * update them for a multi-pass texture blit.
1752                  */
1753                 height = image->height;
1754                 data = (const u8 __user *)image->data;
1755
1756                 size = height * blit_width;
1757
1758                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1759                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1760                         size = height * blit_width;
1761                 } else if (size < 4 && size > 0) {
1762                         size = 4;
1763                 } else if (size == 0) {
1764                         return 0;
1765                 }
1766
1767                 buf = radeon_freelist_get(dev);
1768                 if (0 && !buf) {
1769                         radeon_do_cp_idle(dev_priv);
1770                         buf = radeon_freelist_get(dev);
1771                 }
1772                 if (!buf) {
1773                         DRM_DEBUG("EAGAIN\n");
1774                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1775                                 return -EFAULT;
1776                         return -EAGAIN;
1777                 }
1778
1779                 /* Dispatch the indirect buffer.
1780                  */
1781                 buffer =
1782                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1783                 dwords = size / 4;
1784
1785 #define RADEON_COPY_MT(_buf, _data, _width) \
1786         do { \
1787                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1788                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1789                         return -EFAULT; \
1790                 } \
1791         } while(0)
1792
1793                 if (microtile) {
1794                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1795                            however, we cannot use blitter directly for texture width < 64 bytes,
1796                            since minimum tex pitch is 64 bytes and we need this to match
1797                            the texture width, otherwise the blitter will tile it wrong.
1798                            Thus, tiling manually in this case. Additionally, need to special
1799                            case tex height = 1, since our actual image will have height 2
1800                            and we need to ensure we don't read beyond the texture size
1801                            from user space. */
1802                         if (tex->height == 1) {
1803                                 if (tex_width >= 64 || tex_width <= 16) {
1804                                         RADEON_COPY_MT(buffer, data,
1805                                                 (int)(tex_width * sizeof(u32)));
1806                                 } else if (tex_width == 32) {
1807                                         RADEON_COPY_MT(buffer, data, 16);
1808                                         RADEON_COPY_MT(buffer + 8,
1809                                                        data + 16, 16);
1810                                 }
1811                         } else if (tex_width >= 64 || tex_width == 16) {
1812                                 RADEON_COPY_MT(buffer, data,
1813                                                (int)(dwords * sizeof(u32)));
1814                         } else if (tex_width < 16) {
1815                                 for (i = 0; i < tex->height; i++) {
1816                                         RADEON_COPY_MT(buffer, data, tex_width);
1817                                         buffer += 4;
1818                                         data += tex_width;
1819                                 }
1820                         } else if (tex_width == 32) {
1821                                 /* TODO: make sure this works when not fitting in one buffer
1822                                    (i.e. 32bytes x 2048...) */
1823                                 for (i = 0; i < tex->height; i += 2) {
1824                                         RADEON_COPY_MT(buffer, data, 16);
1825                                         data += 16;
1826                                         RADEON_COPY_MT(buffer + 8, data, 16);
1827                                         data += 16;
1828                                         RADEON_COPY_MT(buffer + 4, data, 16);
1829                                         data += 16;
1830                                         RADEON_COPY_MT(buffer + 12, data, 16);
1831                                         data += 16;
1832                                         buffer += 16;
1833                                 }
1834                         }
1835                 } else {
1836                         if (tex_width >= 32) {
1837                                 /* Texture image width is larger than the minimum, so we
1838                                  * can upload it directly.
1839                                  */
1840                                 RADEON_COPY_MT(buffer, data,
1841                                                (int)(dwords * sizeof(u32)));
1842                         } else {
1843                                 /* Texture image width is less than the minimum, so we
1844                                  * need to pad out each image scanline to the minimum
1845                                  * width.
1846                                  */
1847                                 for (i = 0; i < tex->height; i++) {
1848                                         RADEON_COPY_MT(buffer, data, tex_width);
1849                                         buffer += 8;
1850                                         data += tex_width;
1851                                 }
1852                         }
1853                 }
1854
1855 #undef RADEON_COPY_MT
1856                 byte_offset = (image->y & ~2047) * blit_width;
1857                 buf->file_priv = file_priv;
1858                 buf->used = size;
1859                 offset = dev_priv->gart_buffers_offset + buf->offset;
1860                 BEGIN_RING(9);
1861                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1862                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1863                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1864                          RADEON_GMC_BRUSH_NONE |
1865                          (format << 8) |
1866                          RADEON_GMC_SRC_DATATYPE_COLOR |
1867                          RADEON_ROP3_S |
1868                          RADEON_DP_SRC_SOURCE_MEMORY |
1869                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1870                 OUT_RING((spitch << 22) | (offset >> 10));
1871                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1872                 OUT_RING(0);
1873                 OUT_RING((image->x << 16) | (image->y % 2048));
1874                 OUT_RING((image->width << 16) | height);
1875                 RADEON_WAIT_UNTIL_2D_IDLE();
1876                 ADVANCE_RING();
1877                 COMMIT_RING();
1878
1879                 radeon_cp_discard_buffer(dev, buf);
1880
1881                 /* Update the input parameters for next time */
1882                 image->y += height;
1883                 image->height -= height;
1884                 image->data = (const u8 __user *)image->data + size;
1885         } while (image->height > 0);
1886
1887         /* Flush the pixel cache after the blit completes.  This ensures
1888          * the texture data is written out to memory before rendering
1889          * continues.
1890          */
1891         BEGIN_RING(4);
1892         RADEON_FLUSH_CACHE();
1893         RADEON_WAIT_UNTIL_2D_IDLE();
1894         ADVANCE_RING();
1895         COMMIT_RING();
1896
1897         return 0;
1898 }
1899
1900 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1901 {
1902         drm_radeon_private_t *dev_priv = dev->dev_private;
1903         int i;
1904         RING_LOCALS;
1905         DRM_DEBUG("\n");
1906
1907         BEGIN_RING(35);
1908
1909         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1910         OUT_RING(0x00000000);
1911
1912         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1913         for (i = 0; i < 32; i++) {
1914                 OUT_RING(stipple[i]);
1915         }
1916
1917         ADVANCE_RING();
1918 }
1919
1920 static void radeon_apply_surface_regs(int surf_index,
1921                                       drm_radeon_private_t *dev_priv)
1922 {
1923         if (!dev_priv->mmio)
1924                 return;
1925
1926         radeon_do_cp_idle(dev_priv);
1927
1928         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1929                      dev_priv->surfaces[surf_index].flags);
1930         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1931                      dev_priv->surfaces[surf_index].lower);
1932         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1933                      dev_priv->surfaces[surf_index].upper);
1934 }
1935
1936 /* Allocates a virtual surface
1937  * doesn't always allocate a real surface, will stretch an existing
1938  * surface when possible.
1939  *
1940  * Note that refcount can be at most 2, since during a free refcount=3
1941  * might mean we have to allocate a new surface which might not always
1942  * be available.
1943  * For example : we allocate three contigous surfaces ABC. If B is
1944  * freed, we suddenly need two surfaces to store A and C, which might
1945  * not always be available.
1946  */
1947 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1948                          drm_radeon_private_t *dev_priv,
1949                          struct drm_file *file_priv)
1950 {
1951         struct radeon_virt_surface *s;
1952         int i;
1953         int virt_surface_index;
1954         uint32_t new_upper, new_lower;
1955
1956         new_lower = new->address;
1957         new_upper = new_lower + new->size - 1;
1958
1959         /* sanity check */
1960         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1961             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1962              RADEON_SURF_ADDRESS_FIXED_MASK)
1963             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1964                 return -1;
1965
1966         /* make sure there is no overlap with existing surfaces */
1967         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1968                 if ((dev_priv->surfaces[i].refcount != 0) &&
1969                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1970                       (new_lower < dev_priv->surfaces[i].upper)) ||
1971                      ((new_lower < dev_priv->surfaces[i].lower) &&
1972                       (new_upper > dev_priv->surfaces[i].lower)))) {
1973                         return -1;
1974                 }
1975         }
1976
1977         /* find a virtual surface */
1978         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1979                 if (dev_priv->virt_surfaces[i].file_priv == 0)
1980                         break;
1981         if (i == 2 * RADEON_MAX_SURFACES) {
1982                 return -1;
1983         }
1984         virt_surface_index = i;
1985
1986         /* try to reuse an existing surface */
1987         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1988                 /* extend before */
1989                 if ((dev_priv->surfaces[i].refcount == 1) &&
1990                     (new->flags == dev_priv->surfaces[i].flags) &&
1991                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1992                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1993                         s->surface_index = i;
1994                         s->lower = new_lower;
1995                         s->upper = new_upper;
1996                         s->flags = new->flags;
1997                         s->file_priv = file_priv;
1998                         dev_priv->surfaces[i].refcount++;
1999                         dev_priv->surfaces[i].lower = s->lower;
2000                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2001                         return virt_surface_index;
2002                 }
2003
2004                 /* extend after */
2005                 if ((dev_priv->surfaces[i].refcount == 1) &&
2006                     (new->flags == dev_priv->surfaces[i].flags) &&
2007                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2008                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2009                         s->surface_index = i;
2010                         s->lower = new_lower;
2011                         s->upper = new_upper;
2012                         s->flags = new->flags;
2013                         s->file_priv = file_priv;
2014                         dev_priv->surfaces[i].refcount++;
2015                         dev_priv->surfaces[i].upper = s->upper;
2016                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2017                         return virt_surface_index;
2018                 }
2019         }
2020
2021         /* okay, we need a new one */
2022         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2023                 if (dev_priv->surfaces[i].refcount == 0) {
2024                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2025                         s->surface_index = i;
2026                         s->lower = new_lower;
2027                         s->upper = new_upper;
2028                         s->flags = new->flags;
2029                         s->file_priv = file_priv;
2030                         dev_priv->surfaces[i].refcount = 1;
2031                         dev_priv->surfaces[i].lower = s->lower;
2032                         dev_priv->surfaces[i].upper = s->upper;
2033                         dev_priv->surfaces[i].flags = s->flags;
2034                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2035                         return virt_surface_index;
2036                 }
2037         }
2038
2039         /* we didn't find anything */
2040         return -1;
2041 }
2042
2043 static int free_surface(struct drm_file *file_priv,
2044                         drm_radeon_private_t * dev_priv,
2045                         int lower)
2046 {
2047         struct radeon_virt_surface *s;
2048         int i;
2049         /* find the virtual surface */
2050         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2051                 s = &(dev_priv->virt_surfaces[i]);
2052                 if (s->file_priv) {
2053                         if ((lower == s->lower) && (file_priv == s->file_priv))
2054                         {
2055                                 if (dev_priv->surfaces[s->surface_index].
2056                                     lower == s->lower)
2057                                         dev_priv->surfaces[s->surface_index].
2058                                             lower = s->upper;
2059
2060                                 if (dev_priv->surfaces[s->surface_index].
2061                                     upper == s->upper)
2062                                         dev_priv->surfaces[s->surface_index].
2063                                             upper = s->lower;
2064
2065                                 dev_priv->surfaces[s->surface_index].refcount--;
2066                                 if (dev_priv->surfaces[s->surface_index].
2067                                     refcount == 0)
2068                                         dev_priv->surfaces[s->surface_index].
2069                                             flags = 0;
2070                                 s->file_priv = NULL;
2071                                 radeon_apply_surface_regs(s->surface_index,
2072                                                           dev_priv);
2073                                 return 0;
2074                         }
2075                 }
2076         }
2077         return 1;
2078 }
2079
2080 static void radeon_surfaces_release(struct drm_file *file_priv,
2081                                     drm_radeon_private_t * dev_priv)
2082 {
2083         int i;
2084         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2085                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2086                         free_surface(file_priv, dev_priv,
2087                                      dev_priv->virt_surfaces[i].lower);
2088         }
2089 }
2090
2091 /* ================================================================
2092  * IOCTL functions
2093  */
2094 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2095 {
2096         drm_radeon_private_t *dev_priv = dev->dev_private;
2097         drm_radeon_surface_alloc_t *alloc = data;
2098
2099         if (!dev_priv) {
2100                 DRM_ERROR("called with no initialization\n");
2101                 return -EINVAL;
2102         }
2103
2104         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2105                 return -EINVAL;
2106         else
2107                 return 0;
2108 }
2109
2110 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2111 {
2112         drm_radeon_private_t *dev_priv = dev->dev_private;
2113         drm_radeon_surface_free_t *memfree = data;
2114
2115         if (!dev_priv) {
2116                 DRM_ERROR("called with no initialization\n");
2117                 return -EINVAL;
2118         }
2119
2120         if (free_surface(file_priv, dev_priv, memfree->address))
2121                 return -EINVAL;
2122         else
2123                 return 0;
2124 }
2125
2126 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2127 {
2128         drm_radeon_private_t *dev_priv = dev->dev_private;
2129         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2130         drm_radeon_clear_t *clear = data;
2131         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2132         DRM_DEBUG("\n");
2133
2134         LOCK_TEST_WITH_RETURN(dev, file_priv);
2135
2136         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2137
2138         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2139                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2140
2141         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2142                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2143                 return -EFAULT;
2144
2145         radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2146
2147         COMMIT_RING();
2148         return 0;
2149 }
2150
2151 /* Not sure why this isn't set all the time:
2152  */
2153 static int radeon_do_init_pageflip(struct drm_device * dev)
2154 {
2155         drm_radeon_private_t *dev_priv = dev->dev_private;
2156         RING_LOCALS;
2157
2158         DRM_DEBUG("\n");
2159
2160         BEGIN_RING(6);
2161         RADEON_WAIT_UNTIL_3D_IDLE();
2162         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2163         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2164                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2165         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2166         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2167                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2168         ADVANCE_RING();
2169
2170         dev_priv->page_flipping = 1;
2171
2172         if (dev_priv->sarea_priv->pfCurrentPage != 1)
2173                 dev_priv->sarea_priv->pfCurrentPage = 0;
2174
2175         return 0;
2176 }
2177
2178 /* Swapping and flipping are different operations, need different ioctls.
2179  * They can & should be intermixed to support multiple 3d windows.
2180  */
2181 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2182 {
2183         drm_radeon_private_t *dev_priv = dev->dev_private;
2184         DRM_DEBUG("\n");
2185
2186         LOCK_TEST_WITH_RETURN(dev, file_priv);
2187
2188         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2189
2190         if (!dev_priv->page_flipping)
2191                 radeon_do_init_pageflip(dev);
2192
2193         radeon_cp_dispatch_flip(dev);
2194
2195         COMMIT_RING();
2196         return 0;
2197 }
2198
2199 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2200 {
2201         drm_radeon_private_t *dev_priv = dev->dev_private;
2202         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2203         DRM_DEBUG("\n");
2204
2205         LOCK_TEST_WITH_RETURN(dev, file_priv);
2206
2207         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2208
2209         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2210                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2211
2212         radeon_cp_dispatch_swap(dev);
2213         dev_priv->sarea_priv->ctx_owner = 0;
2214
2215         COMMIT_RING();
2216         return 0;
2217 }
2218
2219 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2220 {
2221         drm_radeon_private_t *dev_priv = dev->dev_private;
2222         drm_radeon_sarea_t *sarea_priv;
2223         struct drm_device_dma *dma = dev->dma;
2224         struct drm_buf *buf;
2225         drm_radeon_vertex_t *vertex = data;
2226         drm_radeon_tcl_prim_t prim;
2227
2228         LOCK_TEST_WITH_RETURN(dev, file_priv);
2229
2230         if (!dev_priv) {
2231                 DRM_ERROR("called with no initialization\n");
2232                 return -EINVAL;
2233         }
2234
2235         sarea_priv = dev_priv->sarea_priv;
2236
2237         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2238                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2239
2240         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2241                 DRM_ERROR("buffer index %d (of %d max)\n",
2242                           vertex->idx, dma->buf_count - 1);
2243                 return -EINVAL;
2244         }
2245         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2246                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2247                 return -EINVAL;
2248         }
2249
2250         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2251         VB_AGE_TEST_WITH_RETURN(dev_priv);
2252
2253         buf = dma->buflist[vertex->idx];
2254
2255         if (buf->file_priv != file_priv) {
2256                 DRM_ERROR("process %d using buffer owned by %p\n",
2257                           DRM_CURRENTPID, buf->file_priv);
2258                 return -EINVAL;
2259         }
2260         if (buf->pending) {
2261                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2262                 return -EINVAL;
2263         }
2264
2265         /* Build up a prim_t record:
2266          */
2267         if (vertex->count) {
2268                 buf->used = vertex->count;      /* not used? */
2269
2270                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2271                         if (radeon_emit_state(dev_priv, file_priv,
2272                                               &sarea_priv->context_state,
2273                                               sarea_priv->tex_state,
2274                                               sarea_priv->dirty)) {
2275                                 DRM_ERROR("radeon_emit_state failed\n");
2276                                 return -EINVAL;
2277                         }
2278
2279                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2280                                                RADEON_UPLOAD_TEX1IMAGES |
2281                                                RADEON_UPLOAD_TEX2IMAGES |
2282                                                RADEON_REQUIRE_QUIESCENCE);
2283                 }
2284
2285                 prim.start = 0;
2286                 prim.finish = vertex->count;    /* unused */
2287                 prim.prim = vertex->prim;
2288                 prim.numverts = vertex->count;
2289                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2290
2291                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2292         }
2293
2294         if (vertex->discard) {
2295                 radeon_cp_discard_buffer(dev, buf);
2296         }
2297
2298         COMMIT_RING();
2299         return 0;
2300 }
2301
2302 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2303 {
2304         drm_radeon_private_t *dev_priv = dev->dev_private;
2305         drm_radeon_sarea_t *sarea_priv;
2306         struct drm_device_dma *dma = dev->dma;
2307         struct drm_buf *buf;
2308         drm_radeon_indices_t *elts = data;
2309         drm_radeon_tcl_prim_t prim;
2310         int count;
2311
2312         LOCK_TEST_WITH_RETURN(dev, file_priv);
2313
2314         if (!dev_priv) {
2315                 DRM_ERROR("called with no initialization\n");
2316                 return -EINVAL;
2317         }
2318         sarea_priv = dev_priv->sarea_priv;
2319
2320         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2321                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2322                   elts->discard);
2323
2324         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2325                 DRM_ERROR("buffer index %d (of %d max)\n",
2326                           elts->idx, dma->buf_count - 1);
2327                 return -EINVAL;
2328         }
2329         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2330                 DRM_ERROR("buffer prim %d\n", elts->prim);
2331                 return -EINVAL;
2332         }
2333
2334         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2335         VB_AGE_TEST_WITH_RETURN(dev_priv);
2336
2337         buf = dma->buflist[elts->idx];
2338
2339         if (buf->file_priv != file_priv) {
2340                 DRM_ERROR("process %d using buffer owned by %p\n",
2341                           DRM_CURRENTPID, buf->file_priv);
2342                 return -EINVAL;
2343         }
2344         if (buf->pending) {
2345                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2346                 return -EINVAL;
2347         }
2348
2349         count = (elts->end - elts->start) / sizeof(u16);
2350         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2351
2352         if (elts->start & 0x7) {
2353                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2354                 return -EINVAL;
2355         }
2356         if (elts->start < buf->used) {
2357                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2358                 return -EINVAL;
2359         }
2360
2361         buf->used = elts->end;
2362
2363         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2364                 if (radeon_emit_state(dev_priv, file_priv,
2365                                       &sarea_priv->context_state,
2366                                       sarea_priv->tex_state,
2367                                       sarea_priv->dirty)) {
2368                         DRM_ERROR("radeon_emit_state failed\n");
2369                         return -EINVAL;
2370                 }
2371
2372                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2373                                        RADEON_UPLOAD_TEX1IMAGES |
2374                                        RADEON_UPLOAD_TEX2IMAGES |
2375                                        RADEON_REQUIRE_QUIESCENCE);
2376         }
2377
2378         /* Build up a prim_t record:
2379          */
2380         prim.start = elts->start;
2381         prim.finish = elts->end;
2382         prim.prim = elts->prim;
2383         prim.offset = 0;        /* offset from start of dma buffers */
2384         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2385         prim.vc_format = dev_priv->sarea_priv->vc_format;
2386
2387         radeon_cp_dispatch_indices(dev, buf, &prim);
2388         if (elts->discard) {
2389                 radeon_cp_discard_buffer(dev, buf);
2390         }
2391
2392         COMMIT_RING();
2393         return 0;
2394 }
2395
2396 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2397 {
2398         drm_radeon_private_t *dev_priv = dev->dev_private;
2399         drm_radeon_texture_t *tex = data;
2400         drm_radeon_tex_image_t image;
2401         int ret;
2402
2403         LOCK_TEST_WITH_RETURN(dev, file_priv);
2404
2405         if (tex->image == NULL) {
2406                 DRM_ERROR("null texture image!\n");
2407                 return -EINVAL;
2408         }
2409
2410         if (DRM_COPY_FROM_USER(&image,
2411                                (drm_radeon_tex_image_t __user *) tex->image,
2412                                sizeof(image)))
2413                 return -EFAULT;
2414
2415         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2416         VB_AGE_TEST_WITH_RETURN(dev_priv);
2417
2418         ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2419
2420         return ret;
2421 }
2422
2423 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2424 {
2425         drm_radeon_private_t *dev_priv = dev->dev_private;
2426         drm_radeon_stipple_t *stipple = data;
2427         u32 mask[32];
2428
2429         LOCK_TEST_WITH_RETURN(dev, file_priv);
2430
2431         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2432                 return -EFAULT;
2433
2434         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2435
2436         radeon_cp_dispatch_stipple(dev, mask);
2437
2438         COMMIT_RING();
2439         return 0;
2440 }
2441
2442 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2443 {
2444         drm_radeon_private_t *dev_priv = dev->dev_private;
2445         struct drm_device_dma *dma = dev->dma;
2446         struct drm_buf *buf;
2447         drm_radeon_indirect_t *indirect = data;
2448         RING_LOCALS;
2449
2450         LOCK_TEST_WITH_RETURN(dev, file_priv);
2451
2452         if (!dev_priv) {
2453                 DRM_ERROR("called with no initialization\n");
2454                 return -EINVAL;
2455         }
2456
2457         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2458                   indirect->idx, indirect->start, indirect->end,
2459                   indirect->discard);
2460
2461         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2462                 DRM_ERROR("buffer index %d (of %d max)\n",
2463                           indirect->idx, dma->buf_count - 1);
2464                 return -EINVAL;
2465         }
2466
2467         buf = dma->buflist[indirect->idx];
2468
2469         if (buf->file_priv != file_priv) {
2470                 DRM_ERROR("process %d using buffer owned by %p\n",
2471                           DRM_CURRENTPID, buf->file_priv);
2472                 return -EINVAL;
2473         }
2474         if (buf->pending) {
2475                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2476                 return -EINVAL;
2477         }
2478
2479         if (indirect->start < buf->used) {
2480                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2481                           indirect->start, buf->used);
2482                 return -EINVAL;
2483         }
2484
2485         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2486         VB_AGE_TEST_WITH_RETURN(dev_priv);
2487
2488         buf->used = indirect->end;
2489
2490         /* Wait for the 3D stream to idle before the indirect buffer
2491          * containing 2D acceleration commands is processed.
2492          */
2493         BEGIN_RING(2);
2494
2495         RADEON_WAIT_UNTIL_3D_IDLE();
2496
2497         ADVANCE_RING();
2498
2499         /* Dispatch the indirect buffer full of commands from the
2500          * X server.  This is insecure and is thus only available to
2501          * privileged clients.
2502          */
2503         radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2504         if (indirect->discard) {
2505                 radeon_cp_discard_buffer(dev, buf);
2506         }
2507
2508         COMMIT_RING();
2509         return 0;
2510 }
2511
2512 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2513 {
2514         drm_radeon_private_t *dev_priv = dev->dev_private;
2515         drm_radeon_sarea_t *sarea_priv;
2516         struct drm_device_dma *dma = dev->dma;
2517         struct drm_buf *buf;
2518         drm_radeon_vertex2_t *vertex = data;
2519         int i;
2520         unsigned char laststate;
2521
2522         LOCK_TEST_WITH_RETURN(dev, file_priv);
2523
2524         if (!dev_priv) {
2525                 DRM_ERROR("called with no initialization\n");
2526                 return -EINVAL;
2527         }
2528
2529         sarea_priv = dev_priv->sarea_priv;
2530
2531         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2532                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2533
2534         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2535                 DRM_ERROR("buffer index %d (of %d max)\n",
2536                           vertex->idx, dma->buf_count - 1);
2537                 return -EINVAL;
2538         }
2539
2540         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2541         VB_AGE_TEST_WITH_RETURN(dev_priv);
2542
2543         buf = dma->buflist[vertex->idx];
2544
2545         if (buf->file_priv != file_priv) {
2546                 DRM_ERROR("process %d using buffer owned by %p\n",
2547                           DRM_CURRENTPID, buf->file_priv);
2548                 return -EINVAL;
2549         }
2550
2551         if (buf->pending) {
2552                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2553                 return -EINVAL;
2554         }
2555
2556         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2557                 return -EINVAL;
2558
2559         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2560                 drm_radeon_prim_t prim;
2561                 drm_radeon_tcl_prim_t tclprim;
2562
2563                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2564                         return -EFAULT;
2565
2566                 if (prim.stateidx != laststate) {
2567                         drm_radeon_state_t state;
2568
2569                         if (DRM_COPY_FROM_USER(&state,
2570                                                &vertex->state[prim.stateidx],
2571                                                sizeof(state)))
2572                                 return -EFAULT;
2573
2574                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2575                                 DRM_ERROR("radeon_emit_state2 failed\n");
2576                                 return -EINVAL;
2577                         }
2578
2579                         laststate = prim.stateidx;
2580                 }
2581
2582                 tclprim.start = prim.start;
2583                 tclprim.finish = prim.finish;
2584                 tclprim.prim = prim.prim;
2585                 tclprim.vc_format = prim.vc_format;
2586
2587                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2588                         tclprim.offset = prim.numverts * 64;
2589                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2590
2591                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2592                 } else {
2593                         tclprim.numverts = prim.numverts;
2594                         tclprim.offset = 0;     /* not used */
2595
2596                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2597                 }
2598
2599                 if (sarea_priv->nbox == 1)
2600                         sarea_priv->nbox = 0;
2601         }
2602
2603         if (vertex->discard) {
2604                 radeon_cp_discard_buffer(dev, buf);
2605         }
2606
2607         COMMIT_RING();
2608         return 0;
2609 }
2610
2611 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2612                                struct drm_file *file_priv,
2613                                drm_radeon_cmd_header_t header,
2614                                drm_radeon_kcmd_buffer_t *cmdbuf)
2615 {
2616         int id = (int)header.packet.packet_id;
2617         int sz, reg;
2618         int *data = (int *)cmdbuf->buf;
2619         RING_LOCALS;
2620
2621         if (id >= RADEON_MAX_STATE_PACKETS)
2622                 return -EINVAL;
2623
2624         sz = packet[id].len;
2625         reg = packet[id].start;
2626
2627         if (sz * sizeof(int) > cmdbuf->bufsz) {
2628                 DRM_ERROR("Packet size provided larger than data provided\n");
2629                 return -EINVAL;
2630         }
2631
2632         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2633                 DRM_ERROR("Packet verification failed\n");
2634                 return -EINVAL;
2635         }
2636
2637         BEGIN_RING(sz + 1);
2638         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2639         OUT_RING_TABLE(data, sz);
2640         ADVANCE_RING();
2641
2642         cmdbuf->buf += sz * sizeof(int);
2643         cmdbuf->bufsz -= sz * sizeof(int);
2644         return 0;
2645 }
2646
2647 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2648                                           drm_radeon_cmd_header_t header,
2649                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2650 {
2651         int sz = header.scalars.count;
2652         int start = header.scalars.offset;
2653         int stride = header.scalars.stride;
2654         RING_LOCALS;
2655
2656         BEGIN_RING(3 + sz);
2657         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2658         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2659         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2660         OUT_RING_TABLE(cmdbuf->buf, sz);
2661         ADVANCE_RING();
2662         cmdbuf->buf += sz * sizeof(int);
2663         cmdbuf->bufsz -= sz * sizeof(int);
2664         return 0;
2665 }
2666
2667 /* God this is ugly
2668  */
2669 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2670                                            drm_radeon_cmd_header_t header,
2671                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2672 {
2673         int sz = header.scalars.count;
2674         int start = ((unsigned int)header.scalars.offset) + 0x100;
2675         int stride = header.scalars.stride;
2676         RING_LOCALS;
2677
2678         BEGIN_RING(3 + sz);
2679         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2680         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2681         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2682         OUT_RING_TABLE(cmdbuf->buf, sz);
2683         ADVANCE_RING();
2684         cmdbuf->buf += sz * sizeof(int);
2685         cmdbuf->bufsz -= sz * sizeof(int);
2686         return 0;
2687 }
2688
2689 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2690                                           drm_radeon_cmd_header_t header,
2691                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2692 {
2693         int sz = header.vectors.count;
2694         int start = header.vectors.offset;
2695         int stride = header.vectors.stride;
2696         RING_LOCALS;
2697
2698         BEGIN_RING(5 + sz);
2699         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2700         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2701         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2702         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2703         OUT_RING_TABLE(cmdbuf->buf, sz);
2704         ADVANCE_RING();
2705
2706         cmdbuf->buf += sz * sizeof(int);
2707         cmdbuf->bufsz -= sz * sizeof(int);
2708         return 0;
2709 }
2710
2711 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2712                                           drm_radeon_cmd_header_t header,
2713                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2714 {
2715         int sz = header.veclinear.count * 4;
2716         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2717         RING_LOCALS;
2718
2719         if (!sz)
2720                 return 0;
2721         if (sz * 4 > cmdbuf->bufsz)
2722                 return -EINVAL;
2723
2724         BEGIN_RING(5 + sz);
2725         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2726         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2727         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2728         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2729         OUT_RING_TABLE(cmdbuf->buf, sz);
2730         ADVANCE_RING();
2731
2732         cmdbuf->buf += sz * sizeof(int);
2733         cmdbuf->bufsz -= sz * sizeof(int);
2734         return 0;
2735 }
2736
2737 static int radeon_emit_packet3(struct drm_device * dev,
2738                                struct drm_file *file_priv,
2739                                drm_radeon_kcmd_buffer_t *cmdbuf)
2740 {
2741         drm_radeon_private_t *dev_priv = dev->dev_private;
2742         unsigned int cmdsz;
2743         int ret;
2744         RING_LOCALS;
2745
2746         DRM_DEBUG("\n");
2747
2748         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2749                                                   cmdbuf, &cmdsz))) {
2750                 DRM_ERROR("Packet verification failed\n");
2751                 return ret;
2752         }
2753
2754         BEGIN_RING(cmdsz);
2755         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2756         ADVANCE_RING();
2757
2758         cmdbuf->buf += cmdsz * 4;
2759         cmdbuf->bufsz -= cmdsz * 4;
2760         return 0;
2761 }
2762
2763 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2764                                         struct drm_file *file_priv,
2765                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2766                                         int orig_nbox)
2767 {
2768         drm_radeon_private_t *dev_priv = dev->dev_private;
2769         struct drm_clip_rect box;
2770         unsigned int cmdsz;
2771         int ret;
2772         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2773         int i = 0;
2774         RING_LOCALS;
2775
2776         DRM_DEBUG("\n");
2777
2778         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2779                                                   cmdbuf, &cmdsz))) {
2780                 DRM_ERROR("Packet verification failed\n");
2781                 return ret;
2782         }
2783
2784         if (!orig_nbox)
2785                 goto out;
2786
2787         do {
2788                 if (i < cmdbuf->nbox) {
2789                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2790                                 return -EFAULT;
2791                         /* FIXME The second and subsequent times round
2792                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2793                          * calling emit_clip_rect(). This fixes a
2794                          * lockup on fast machines when sending
2795                          * several cliprects with a cmdbuf, as when
2796                          * waving a 2D window over a 3D
2797                          * window. Something in the commands from user
2798                          * space seems to hang the card when they're
2799                          * sent several times in a row. That would be
2800                          * the correct place to fix it but this works
2801                          * around it until I can figure that out - Tim
2802                          * Smith */
2803                         if (i) {
2804                                 BEGIN_RING(2);
2805                                 RADEON_WAIT_UNTIL_3D_IDLE();
2806                                 ADVANCE_RING();
2807                         }
2808                         radeon_emit_clip_rect(dev_priv, &box);
2809                 }
2810
2811                 BEGIN_RING(cmdsz);
2812                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2813                 ADVANCE_RING();
2814
2815         } while (++i < cmdbuf->nbox);
2816         if (cmdbuf->nbox == 1)
2817                 cmdbuf->nbox = 0;
2818
2819       out:
2820         cmdbuf->buf += cmdsz * 4;
2821         cmdbuf->bufsz -= cmdsz * 4;
2822         return 0;
2823 }
2824
2825 static int radeon_emit_wait(struct drm_device * dev, int flags)
2826 {
2827         drm_radeon_private_t *dev_priv = dev->dev_private;
2828         RING_LOCALS;
2829
2830         DRM_DEBUG("%x\n", flags);
2831         switch (flags) {
2832         case RADEON_WAIT_2D:
2833                 BEGIN_RING(2);
2834                 RADEON_WAIT_UNTIL_2D_IDLE();
2835                 ADVANCE_RING();
2836                 break;
2837         case RADEON_WAIT_3D:
2838                 BEGIN_RING(2);
2839                 RADEON_WAIT_UNTIL_3D_IDLE();
2840                 ADVANCE_RING();
2841                 break;
2842         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2843                 BEGIN_RING(2);
2844                 RADEON_WAIT_UNTIL_IDLE();
2845                 ADVANCE_RING();
2846                 break;
2847         default:
2848                 return -EINVAL;
2849         }
2850
2851         return 0;
2852 }
2853
2854 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2855 {
2856         drm_radeon_private_t *dev_priv = dev->dev_private;
2857         struct drm_device_dma *dma = dev->dma;
2858         struct drm_buf *buf = NULL;
2859         int idx;
2860         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2861         drm_radeon_cmd_header_t header;
2862         int orig_nbox, orig_bufsz;
2863         char *kbuf = NULL;
2864
2865         LOCK_TEST_WITH_RETURN(dev, file_priv);
2866
2867         if (!dev_priv) {
2868                 DRM_ERROR("called with no initialization\n");
2869                 return -EINVAL;
2870         }
2871
2872         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2873         VB_AGE_TEST_WITH_RETURN(dev_priv);
2874
2875         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2876                 return -EINVAL;
2877         }
2878
2879         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2880          * races between checking values and using those values in other code,
2881          * and simply to avoid a lot of function calls to copy in data.
2882          */
2883         orig_bufsz = cmdbuf->bufsz;
2884         if (orig_bufsz != 0) {
2885                 kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2886                 if (kbuf == NULL)
2887                         return -ENOMEM;
2888                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2889                                        cmdbuf->bufsz)) {
2890                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2891                         return -EFAULT;
2892                 }
2893                 cmdbuf->buf = kbuf;
2894         }
2895
2896         orig_nbox = cmdbuf->nbox;
2897
2898         if (dev_priv->chip_family >= CHIP_R300) {
2899                 int temp;
2900                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2901
2902                 if (orig_bufsz != 0)
2903                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2904
2905                 return temp;
2906         }
2907
2908         /* microcode_version != r300 */
2909         while (cmdbuf->bufsz >= sizeof(header)) {
2910
2911                 header.i = *(int *)cmdbuf->buf;
2912                 cmdbuf->buf += sizeof(header);
2913                 cmdbuf->bufsz -= sizeof(header);
2914
2915                 switch (header.header.cmd_type) {
2916                 case RADEON_CMD_PACKET:
2917                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2918                         if (radeon_emit_packets
2919                             (dev_priv, file_priv, header, cmdbuf)) {
2920                                 DRM_ERROR("radeon_emit_packets failed\n");
2921                                 goto err;
2922                         }
2923                         break;
2924
2925                 case RADEON_CMD_SCALARS:
2926                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2927                         if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2928                                 DRM_ERROR("radeon_emit_scalars failed\n");
2929                                 goto err;
2930                         }
2931                         break;
2932
2933                 case RADEON_CMD_VECTORS:
2934                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2935                         if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2936                                 DRM_ERROR("radeon_emit_vectors failed\n");
2937                                 goto err;
2938                         }
2939                         break;
2940
2941                 case RADEON_CMD_DMA_DISCARD:
2942                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2943                         idx = header.dma.buf_idx;
2944                         if (idx < 0 || idx >= dma->buf_count) {
2945                                 DRM_ERROR("buffer index %d (of %d max)\n",
2946                                           idx, dma->buf_count - 1);
2947                                 goto err;
2948                         }
2949
2950                         buf = dma->buflist[idx];
2951                         if (buf->file_priv != file_priv || buf->pending) {
2952                                 DRM_ERROR("bad buffer %p %p %d\n",
2953                                           buf->file_priv, file_priv,
2954                                           buf->pending);
2955                                 goto err;
2956                         }
2957
2958                         radeon_cp_discard_buffer(dev, buf);
2959                         break;
2960
2961                 case RADEON_CMD_PACKET3:
2962                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2963                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2964                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2965                                 goto err;
2966                         }
2967                         break;
2968
2969                 case RADEON_CMD_PACKET3_CLIP:
2970                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2971                         if (radeon_emit_packet3_cliprect
2972                             (dev, file_priv, cmdbuf, orig_nbox)) {
2973                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2974                                 goto err;
2975                         }
2976                         break;
2977
2978                 case RADEON_CMD_SCALARS2:
2979                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2980                         if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2981                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2982                                 goto err;
2983                         }
2984                         break;
2985
2986                 case RADEON_CMD_WAIT:
2987                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2988                         if (radeon_emit_wait(dev, header.wait.flags)) {
2989                                 DRM_ERROR("radeon_emit_wait failed\n");
2990                                 goto err;
2991                         }
2992                         break;
2993                 case RADEON_CMD_VECLINEAR:
2994                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2995                         if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2996                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2997                                 goto err;
2998                         }
2999                         break;
3000
3001                 default:
3002                         DRM_ERROR("bad cmd_type %d at %p\n",
3003                                   header.header.cmd_type,
3004                                   cmdbuf->buf - sizeof(header));
3005                         goto err;
3006                 }
3007         }
3008
3009         if (orig_bufsz != 0)
3010                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3011
3012         DRM_DEBUG("DONE\n");
3013         COMMIT_RING();
3014         return 0;
3015
3016       err:
3017         if (orig_bufsz != 0)
3018                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3019         return -EINVAL;
3020 }
3021
3022 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3023 {
3024         drm_radeon_private_t *dev_priv = dev->dev_private;
3025         drm_radeon_getparam_t *param = data;
3026         int value;
3027
3028         if (!dev_priv) {
3029                 DRM_ERROR("called with no initialization\n");
3030                 return -EINVAL;
3031         }
3032
3033         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3034
3035         switch (param->param) {
3036         case RADEON_PARAM_GART_BUFFER_OFFSET:
3037                 value = dev_priv->gart_buffers_offset;
3038                 break;
3039         case RADEON_PARAM_LAST_FRAME:
3040                 dev_priv->stats.last_frame_reads++;
3041                 value = GET_SCRATCH(0);
3042                 break;
3043         case RADEON_PARAM_LAST_DISPATCH:
3044                 value = GET_SCRATCH(1);
3045                 break;
3046         case RADEON_PARAM_LAST_CLEAR:
3047                 dev_priv->stats.last_clear_reads++;
3048                 value = GET_SCRATCH(2);
3049                 break;
3050         case RADEON_PARAM_IRQ_NR:
3051                 value = dev->irq;
3052                 break;
3053         case RADEON_PARAM_GART_BASE:
3054                 value = dev_priv->gart_vm_start;
3055                 break;
3056         case RADEON_PARAM_REGISTER_HANDLE:
3057                 value = dev_priv->mmio->offset;
3058                 break;
3059         case RADEON_PARAM_STATUS_HANDLE:
3060                 value = dev_priv->ring_rptr_offset;
3061                 break;
3062 #ifndef __LP64__
3063                 /*
3064                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3065                  * pointer which can't fit into an int-sized variable.  According to
3066                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3067                  * not supporting it shouldn't be a problem.  If the same functionality
3068                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3069                  * so backwards-compatibility for the embedded platforms can be
3070                  * maintained.  --davidm 4-Feb-2004.
3071                  */
3072         case RADEON_PARAM_SAREA_HANDLE:
3073                 /* The lock is the first dword in the sarea. */
3074                 value = (long)dev->lock.hw_lock;
3075                 break;
3076 #endif
3077         case RADEON_PARAM_GART_TEX_HANDLE:
3078                 value = dev_priv->gart_textures_offset;
3079                 break;
3080         case RADEON_PARAM_SCRATCH_OFFSET:
3081                 if (!dev_priv->writeback_works)
3082                         return -EINVAL;
3083                 value = RADEON_SCRATCH_REG_OFFSET;
3084                 break;
3085
3086         case RADEON_PARAM_CARD_TYPE:
3087                 if (dev_priv->flags & RADEON_IS_PCIE)
3088                         value = RADEON_CARD_PCIE;
3089                 else if (dev_priv->flags & RADEON_IS_AGP)
3090                         value = RADEON_CARD_AGP;
3091                 else
3092                         value = RADEON_CARD_PCI;
3093                 break;
3094         case RADEON_PARAM_VBLANK_CRTC:
3095                 value = radeon_vblank_crtc_get(dev);
3096                 break;
3097         case RADEON_PARAM_FB_LOCATION:
3098                 value = radeon_read_fb_location(dev_priv);
3099                 break;
3100         case RADEON_PARAM_NUM_GB_PIPES:
3101                 value = dev_priv->num_gb_pipes;
3102                 break;
3103         default:
3104                 DRM_DEBUG( "Invalid parameter %d\n", param->param );
3105                 return -EINVAL;
3106         }
3107
3108         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3109                 DRM_ERROR("copy_to_user\n");
3110                 return -EFAULT;
3111         }
3112
3113         return 0;
3114 }
3115
3116 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3117 {
3118         drm_radeon_private_t *dev_priv = dev->dev_private;
3119         drm_radeon_setparam_t *sp = data;
3120         struct drm_radeon_driver_file_fields *radeon_priv;
3121
3122         if (!dev_priv) {
3123                 DRM_ERROR("called with no initialization\n");
3124                 return -EINVAL;
3125         }
3126
3127         switch (sp->param) {
3128         case RADEON_SETPARAM_FB_LOCATION:
3129                 radeon_priv = file_priv->driver_priv;
3130                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3131                     sp->value;
3132                 break;
3133         case RADEON_SETPARAM_SWITCH_TILING:
3134                 if (sp->value == 0) {
3135                         DRM_DEBUG("color tiling disabled\n");
3136                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3137                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3138                         if (dev_priv->sarea_priv)
3139                                 dev_priv->sarea_priv->tiling_enabled = 0;
3140                 } else if (sp->value == 1) {
3141                         DRM_DEBUG("color tiling enabled\n");
3142                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3143                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3144                         if (dev_priv->sarea_priv)
3145                                 dev_priv->sarea_priv->tiling_enabled = 1;
3146                 }
3147                 break;
3148         case RADEON_SETPARAM_PCIGART_LOCATION:
3149                 dev_priv->pcigart_offset = sp->value;
3150                 dev_priv->pcigart_offset_set = 1;
3151                 break;
3152         case RADEON_SETPARAM_NEW_MEMMAP:
3153                 dev_priv->new_memmap = sp->value;
3154                 break;
3155         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3156                 dev_priv->gart_info.table_size = sp->value;
3157                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3158                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3159                 break;
3160         case RADEON_SETPARAM_VBLANK_CRTC:
3161                 return radeon_vblank_crtc_set(dev, sp->value);
3162                 break;
3163         default:
3164                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3165                 return -EINVAL;
3166         }
3167
3168         return 0;
3169 }
3170
3171 /* When a client dies:
3172  *    - Check for and clean up flipped page state
3173  *    - Free any alloced GART memory.
3174  *    - Free any alloced radeon surfaces.
3175  *
3176  * DRM infrastructure takes care of reclaiming dma buffers.
3177  */
3178 void radeon_driver_preclose(struct drm_device *dev,
3179                             struct drm_file *file_priv)
3180 {
3181         if (dev->dev_private) {
3182                 drm_radeon_private_t *dev_priv = dev->dev_private;
3183                 dev_priv->page_flipping = 0;
3184                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3185                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3186                 radeon_surfaces_release(file_priv, dev_priv);
3187         }
3188 }
3189
3190 void radeon_driver_lastclose(struct drm_device *dev)
3191 {
3192         if (dev->dev_private) {
3193                 drm_radeon_private_t *dev_priv = dev->dev_private;
3194
3195                 if (dev_priv->sarea_priv &&
3196                     dev_priv->sarea_priv->pfCurrentPage != 0)
3197                         radeon_cp_dispatch_flip(dev);
3198         }
3199
3200         radeon_do_release(dev);
3201 }
3202
3203 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3204 {
3205         drm_radeon_private_t *dev_priv = dev->dev_private;
3206         struct drm_radeon_driver_file_fields *radeon_priv;
3207
3208         DRM_DEBUG("\n");
3209         radeon_priv =
3210             (struct drm_radeon_driver_file_fields *)
3211             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3212
3213         if (!radeon_priv)
3214                 return -ENOMEM;
3215
3216         file_priv->driver_priv = radeon_priv;
3217
3218         if (dev_priv)
3219                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3220         else
3221                 radeon_priv->radeon_fb_delta = 0;
3222         return 0;
3223 }
3224
3225 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3226 {
3227         struct drm_radeon_driver_file_fields *radeon_priv =
3228             file_priv->driver_priv;
3229
3230         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3231 }
3232
3233 struct drm_ioctl_desc radeon_ioctls[] = {
3234         DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3235         DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3236         DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3237         DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3238         DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3239         DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3240         DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3241         DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3242         DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3243         DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3244         DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3245         DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3246         DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3247         DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3248         DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3249         DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3250         DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3251         DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3252         DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3253         DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3254         DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3255         DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3256         DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3257         DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3258         DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3259         DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3260         DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
3261 };
3262
3263 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);