OSDN Git Service

883d1d0e03f0db6d212bb5ca4c27f00161b4841e
[android-x86/external-libdrm.git] / shared-core / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
2  *
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 * offset)
44 {
45         u32 off = *offset;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         if (off >= dev_priv->fb_location &&
49             off < (dev_priv->gart_vm_start + dev_priv->gart_size))
50                 return 0;
51
52         radeon_priv = filp_priv->driver_priv;
53
54         off += radeon_priv->radeon_fb_delta;
55
56         DRM_DEBUG("offset fixed up to 0x%x\n", off);
57
58         if (off < dev_priv->fb_location ||
59             off >= (dev_priv->gart_vm_start + dev_priv->gart_size))
60                 return DRM_ERR(EINVAL);
61
62         *offset = off;
63
64         return 0;
65 }
66
67 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
68                                                      dev_priv,
69                                                      drm_file_t * filp_priv,
70                                                      int id, u32 __user * data)
71 {
72         switch (id) {
73
74         case RADEON_EMIT_PP_MISC:
75                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
76                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
77                         DRM_ERROR("Invalid depth buffer offset\n");
78                         return DRM_ERR(EINVAL);
79                 }
80                 break;
81
82         case RADEON_EMIT_PP_CNTL:
83                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
84                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
85                         DRM_ERROR("Invalid colour buffer offset\n");
86                         return DRM_ERR(EINVAL);
87                 }
88                 break;
89
90         case R200_EMIT_PP_TXOFFSET_0:
91         case R200_EMIT_PP_TXOFFSET_1:
92         case R200_EMIT_PP_TXOFFSET_2:
93         case R200_EMIT_PP_TXOFFSET_3:
94         case R200_EMIT_PP_TXOFFSET_4:
95         case R200_EMIT_PP_TXOFFSET_5:
96                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
97                                                   &data[0])) {
98                         DRM_ERROR("Invalid R200 texture offset\n");
99                         return DRM_ERR(EINVAL);
100                 }
101                 break;
102
103         case RADEON_EMIT_PP_TXFILTER_0:
104         case RADEON_EMIT_PP_TXFILTER_1:
105         case RADEON_EMIT_PP_TXFILTER_2:
106                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
107                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
108                         DRM_ERROR("Invalid R100 texture offset\n");
109                         return DRM_ERR(EINVAL);
110                 }
111                 break;
112
113         case R200_EMIT_PP_CUBIC_OFFSETS_0:
114         case R200_EMIT_PP_CUBIC_OFFSETS_1:
115         case R200_EMIT_PP_CUBIC_OFFSETS_2:
116         case R200_EMIT_PP_CUBIC_OFFSETS_3:
117         case R200_EMIT_PP_CUBIC_OFFSETS_4:
118         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
119                         int i;
120                         for (i = 0; i < 5; i++) {
121                                 if (radeon_check_and_fixup_offset(dev_priv,
122                                                                   filp_priv,
123                                                                   &data[i])) {
124                                         DRM_ERROR
125                                             ("Invalid R200 cubic texture offset\n");
126                                         return DRM_ERR(EINVAL);
127                                 }
128                         }
129                         break;
130                 }
131
132         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
133         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
134         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
135                         int i;
136                         for (i = 0; i < 5; i++) {
137                                 if (radeon_check_and_fixup_offset(dev_priv,
138                                                                   filp_priv,
139                                                                   &data[i])) {
140                                         DRM_ERROR
141                                             ("Invalid R100 cubic texture offset\n");
142                                         return DRM_ERR(EINVAL);
143                                 }
144                         }
145                 }
146                 break;
147
148         case RADEON_EMIT_RB3D_COLORPITCH:
149         case RADEON_EMIT_RE_LINE_PATTERN:
150         case RADEON_EMIT_SE_LINE_WIDTH:
151         case RADEON_EMIT_PP_LUM_MATRIX:
152         case RADEON_EMIT_PP_ROT_MATRIX_0:
153         case RADEON_EMIT_RB3D_STENCILREFMASK:
154         case RADEON_EMIT_SE_VPORT_XSCALE:
155         case RADEON_EMIT_SE_CNTL:
156         case RADEON_EMIT_SE_CNTL_STATUS:
157         case RADEON_EMIT_RE_MISC:
158         case RADEON_EMIT_PP_BORDER_COLOR_0:
159         case RADEON_EMIT_PP_BORDER_COLOR_1:
160         case RADEON_EMIT_PP_BORDER_COLOR_2:
161         case RADEON_EMIT_SE_ZBIAS_FACTOR:
162         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
163         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
164         case R200_EMIT_PP_TXCBLEND_0:
165         case R200_EMIT_PP_TXCBLEND_1:
166         case R200_EMIT_PP_TXCBLEND_2:
167         case R200_EMIT_PP_TXCBLEND_3:
168         case R200_EMIT_PP_TXCBLEND_4:
169         case R200_EMIT_PP_TXCBLEND_5:
170         case R200_EMIT_PP_TXCBLEND_6:
171         case R200_EMIT_PP_TXCBLEND_7:
172         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
173         case R200_EMIT_TFACTOR_0:
174         case R200_EMIT_VTX_FMT_0:
175         case R200_EMIT_VAP_CTL:
176         case R200_EMIT_MATRIX_SELECT_0:
177         case R200_EMIT_TEX_PROC_CTL_2:
178         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
179         case R200_EMIT_PP_TXFILTER_0:
180         case R200_EMIT_PP_TXFILTER_1:
181         case R200_EMIT_PP_TXFILTER_2:
182         case R200_EMIT_PP_TXFILTER_3:
183         case R200_EMIT_PP_TXFILTER_4:
184         case R200_EMIT_PP_TXFILTER_5:
185         case R200_EMIT_VTE_CNTL:
186         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
187         case R200_EMIT_PP_TAM_DEBUG3:
188         case R200_EMIT_PP_CNTL_X:
189         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
190         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
191         case R200_EMIT_RE_SCISSOR_TL_0:
192         case R200_EMIT_RE_SCISSOR_TL_1:
193         case R200_EMIT_RE_SCISSOR_TL_2:
194         case R200_EMIT_SE_VAP_CNTL_STATUS:
195         case R200_EMIT_SE_VTX_STATE_CNTL:
196         case R200_EMIT_RE_POINTSIZE:
197         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
198         case R200_EMIT_PP_CUBIC_FACES_0:
199         case R200_EMIT_PP_CUBIC_FACES_1:
200         case R200_EMIT_PP_CUBIC_FACES_2:
201         case R200_EMIT_PP_CUBIC_FACES_3:
202         case R200_EMIT_PP_CUBIC_FACES_4:
203         case R200_EMIT_PP_CUBIC_FACES_5:
204         case RADEON_EMIT_PP_TEX_SIZE_0:
205         case RADEON_EMIT_PP_TEX_SIZE_1:
206         case RADEON_EMIT_PP_TEX_SIZE_2:
207         case R200_EMIT_RB3D_BLENDCOLOR:
208         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
209         case RADEON_EMIT_PP_CUBIC_FACES_0:
210         case RADEON_EMIT_PP_CUBIC_FACES_1:
211         case RADEON_EMIT_PP_CUBIC_FACES_2:
212                 /* These packets don't contain memory offsets */
213                 break;
214
215         default:
216                 DRM_ERROR("Unknown state packet ID %d\n", id);
217                 return DRM_ERR(EINVAL);
218         }
219
220         return 0;
221 }
222
223 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
224                                                      dev_priv,
225                                                      drm_file_t * filp_priv,
226                                                      drm_radeon_cmd_buffer_t *
227                                                      cmdbuf,
228                                                      unsigned int *cmdsz)
229 {
230         u32 *cmd = (u32 *) cmdbuf->buf;
231
232         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
233
234         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
235                 DRM_ERROR("Not a type 3 packet\n");
236                 return DRM_ERR(EINVAL);
237         }
238
239         if (4 * *cmdsz > cmdbuf->bufsz) {
240                 DRM_ERROR("Packet size larger than size of data provided\n");
241                 return DRM_ERR(EINVAL);
242         }
243
244         /* Check client state and fix it up if necessary */
245         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
246                 u32 offset;
247
248                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
249                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
250                         offset = cmd[2] << 10;
251                         if (radeon_check_and_fixup_offset
252                             (dev_priv, filp_priv, &offset)) {
253                                 DRM_ERROR("Invalid first packet offset\n");
254                                 return DRM_ERR(EINVAL);
255                         }
256                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
257                 }
258
259                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
260                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
261                         offset = cmd[3] << 10;
262                         if (radeon_check_and_fixup_offset
263                             (dev_priv, filp_priv, &offset)) {
264                                 DRM_ERROR("Invalid second packet offset\n");
265                                 return DRM_ERR(EINVAL);
266                         }
267                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
268                 }
269         }
270
271         return 0;
272 }
273
274 /* ================================================================
275  * CP hardware state programming functions
276  */
277
278 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
279                                              drm_clip_rect_t * box)
280 {
281         RING_LOCALS;
282
283         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
284                   box->x1, box->y1, box->x2, box->y2);
285
286         BEGIN_RING(4);
287         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
288         OUT_RING((box->y1 << 16) | box->x1);
289         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
290         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
291         ADVANCE_RING();
292 }
293
294 /* Emit 1.1 state
295  */
296 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
297                              drm_file_t * filp_priv,
298                              drm_radeon_context_regs_t * ctx,
299                              drm_radeon_texture_regs_t * tex,
300                              unsigned int dirty)
301 {
302         RING_LOCALS;
303         DRM_DEBUG("dirty=0x%08x\n", dirty);
304
305         if (dirty & RADEON_UPLOAD_CONTEXT) {
306                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
307                                                   &ctx->rb3d_depthoffset)) {
308                         DRM_ERROR("Invalid depth buffer offset\n");
309                         return DRM_ERR(EINVAL);
310                 }
311
312                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
313                                                   &ctx->rb3d_coloroffset)) {
314                         DRM_ERROR("Invalid depth buffer offset\n");
315                         return DRM_ERR(EINVAL);
316                 }
317
318                 BEGIN_RING(14);
319                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
320                 OUT_RING(ctx->pp_misc);
321                 OUT_RING(ctx->pp_fog_color);
322                 OUT_RING(ctx->re_solid_color);
323                 OUT_RING(ctx->rb3d_blendcntl);
324                 OUT_RING(ctx->rb3d_depthoffset);
325                 OUT_RING(ctx->rb3d_depthpitch);
326                 OUT_RING(ctx->rb3d_zstencilcntl);
327                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
328                 OUT_RING(ctx->pp_cntl);
329                 OUT_RING(ctx->rb3d_cntl);
330                 OUT_RING(ctx->rb3d_coloroffset);
331                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
332                 OUT_RING(ctx->rb3d_colorpitch);
333                 ADVANCE_RING();
334         }
335
336         if (dirty & RADEON_UPLOAD_VERTFMT) {
337                 BEGIN_RING(2);
338                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
339                 OUT_RING(ctx->se_coord_fmt);
340                 ADVANCE_RING();
341         }
342
343         if (dirty & RADEON_UPLOAD_LINE) {
344                 BEGIN_RING(5);
345                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
346                 OUT_RING(ctx->re_line_pattern);
347                 OUT_RING(ctx->re_line_state);
348                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
349                 OUT_RING(ctx->se_line_width);
350                 ADVANCE_RING();
351         }
352
353         if (dirty & RADEON_UPLOAD_BUMPMAP) {
354                 BEGIN_RING(5);
355                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
356                 OUT_RING(ctx->pp_lum_matrix);
357                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
358                 OUT_RING(ctx->pp_rot_matrix_0);
359                 OUT_RING(ctx->pp_rot_matrix_1);
360                 ADVANCE_RING();
361         }
362
363         if (dirty & RADEON_UPLOAD_MASKS) {
364                 BEGIN_RING(4);
365                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
366                 OUT_RING(ctx->rb3d_stencilrefmask);
367                 OUT_RING(ctx->rb3d_ropcntl);
368                 OUT_RING(ctx->rb3d_planemask);
369                 ADVANCE_RING();
370         }
371
372         if (dirty & RADEON_UPLOAD_VIEWPORT) {
373                 BEGIN_RING(7);
374                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
375                 OUT_RING(ctx->se_vport_xscale);
376                 OUT_RING(ctx->se_vport_xoffset);
377                 OUT_RING(ctx->se_vport_yscale);
378                 OUT_RING(ctx->se_vport_yoffset);
379                 OUT_RING(ctx->se_vport_zscale);
380                 OUT_RING(ctx->se_vport_zoffset);
381                 ADVANCE_RING();
382         }
383
384         if (dirty & RADEON_UPLOAD_SETUP) {
385                 BEGIN_RING(4);
386                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
387                 OUT_RING(ctx->se_cntl);
388                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
389                 OUT_RING(ctx->se_cntl_status);
390                 ADVANCE_RING();
391         }
392
393         if (dirty & RADEON_UPLOAD_MISC) {
394                 BEGIN_RING(2);
395                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
396                 OUT_RING(ctx->re_misc);
397                 ADVANCE_RING();
398         }
399
400         if (dirty & RADEON_UPLOAD_TEX0) {
401                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
402                                                   &tex[0].pp_txoffset)) {
403                         DRM_ERROR("Invalid texture offset for unit 0\n");
404                         return DRM_ERR(EINVAL);
405                 }
406
407                 BEGIN_RING(9);
408                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
409                 OUT_RING(tex[0].pp_txfilter);
410                 OUT_RING(tex[0].pp_txformat);
411                 OUT_RING(tex[0].pp_txoffset);
412                 OUT_RING(tex[0].pp_txcblend);
413                 OUT_RING(tex[0].pp_txablend);
414                 OUT_RING(tex[0].pp_tfactor);
415                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
416                 OUT_RING(tex[0].pp_border_color);
417                 ADVANCE_RING();
418         }
419
420         if (dirty & RADEON_UPLOAD_TEX1) {
421                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
422                                                   &tex[1].pp_txoffset)) {
423                         DRM_ERROR("Invalid texture offset for unit 1\n");
424                         return DRM_ERR(EINVAL);
425                 }
426
427                 BEGIN_RING(9);
428                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
429                 OUT_RING(tex[1].pp_txfilter);
430                 OUT_RING(tex[1].pp_txformat);
431                 OUT_RING(tex[1].pp_txoffset);
432                 OUT_RING(tex[1].pp_txcblend);
433                 OUT_RING(tex[1].pp_txablend);
434                 OUT_RING(tex[1].pp_tfactor);
435                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
436                 OUT_RING(tex[1].pp_border_color);
437                 ADVANCE_RING();
438         }
439
440         if (dirty & RADEON_UPLOAD_TEX2) {
441                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
442                                                   &tex[2].pp_txoffset)) {
443                         DRM_ERROR("Invalid texture offset for unit 2\n");
444                         return DRM_ERR(EINVAL);
445                 }
446
447                 BEGIN_RING(9);
448                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
449                 OUT_RING(tex[2].pp_txfilter);
450                 OUT_RING(tex[2].pp_txformat);
451                 OUT_RING(tex[2].pp_txoffset);
452                 OUT_RING(tex[2].pp_txcblend);
453                 OUT_RING(tex[2].pp_txablend);
454                 OUT_RING(tex[2].pp_tfactor);
455                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
456                 OUT_RING(tex[2].pp_border_color);
457                 ADVANCE_RING();
458         }
459
460         return 0;
461 }
462
463 /* Emit 1.2 state
464  */
465 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
466                               drm_file_t * filp_priv,
467                               drm_radeon_state_t * state)
468 {
469         RING_LOCALS;
470
471         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
472                 BEGIN_RING(3);
473                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
474                 OUT_RING(state->context2.se_zbias_factor);
475                 OUT_RING(state->context2.se_zbias_constant);
476                 ADVANCE_RING();
477         }
478
479         return radeon_emit_state(dev_priv, filp_priv, &state->context,
480                                  state->tex, state->dirty);
481 }
482
483 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
484  * 1.3 cmdbuffers allow all previous state to be updated as well as
485  * the tcl scalar and vector areas.
486  */
487 static struct {
488         int start;
489         int len;
490         const char *name;
491 } packet[RADEON_MAX_STATE_PACKETS] = {
492         {
493         RADEON_PP_MISC, 7, "RADEON_PP_MISC"}, {
494         RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"}, {
495         RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"}, {
496         RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"}, {
497         RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"}, {
498         RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"}, {
499         RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"}, {
500         RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"}, {
501         RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"}, {
502         RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"}, {
503         RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"}, {
504         RADEON_RE_MISC, 1, "RADEON_RE_MISC"}, {
505         RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"}, {
506         RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"}, {
507         RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"}, {
508         RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"}, {
509         RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"}, {
510         RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"}, {
511         RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"}, {
512         RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"}, {
513         RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
514                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"}, {
515         R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"}, {
516         R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"}, {
517         R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"}, {
518         R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"}, {
519         R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"}, {
520         R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"}, {
521         R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"}, {
522         R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"}, {
523         R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
524         {
525         R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"}, {
526         R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"}, {
527         R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"}, {
528         R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"}, {
529         R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"}, {
530         R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
531         {
532         R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"}, {
533         R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"}, {
534         R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"}, {
535         R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"}, {
536         R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"}, {
537         R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"}, {
538         R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"}, {
539         R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"}, {
540         R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"}, {
541         R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"}, {
542         R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"}, {
543         R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"}, {
544         R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"}, {
545         R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
546         {
547         R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"}, {
548         R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"}, {
549         R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"}, {
550         R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"}, {
551         R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"}, {
552         R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"}, {
553         R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"}, {
554         R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"}, {
555         R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"}, {
556         R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"}, {
557         R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
558                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"}, {
559         R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},     /* 61 */
560         {
561         R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"},     /* 62 */
562         {
563         R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"}, {
564         R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"}, {
565         R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"}, {
566         R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"}, {
567         R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"}, {
568         R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"}, {
569         R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"}, {
570         R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"}, {
571         R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"}, {
572         R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"}, {
573         RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, {
574         RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, {
575         RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, {
576         R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, {
577         R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
578         {
579         RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"}, {
580         RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"}, {
581         RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, {
582         RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, {
583         RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, {
584         RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
585 };
586
587 /* ================================================================
588  * Performance monitoring functions
589  */
590
591 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
592                              int x, int y, int w, int h, int r, int g, int b)
593 {
594         u32 color;
595         RING_LOCALS;
596
597         x += dev_priv->sarea_priv->boxes[0].x1;
598         y += dev_priv->sarea_priv->boxes[0].y1;
599
600         switch (dev_priv->color_fmt) {
601         case RADEON_COLOR_FORMAT_RGB565:
602                 color = (((r & 0xf8) << 8) |
603                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
604                 break;
605         case RADEON_COLOR_FORMAT_ARGB8888:
606         default:
607                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
608                 break;
609         }
610
611         BEGIN_RING(4);
612         RADEON_WAIT_UNTIL_3D_IDLE();
613         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
614         OUT_RING(0xffffffff);
615         ADVANCE_RING();
616
617         BEGIN_RING(6);
618
619         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
620         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
621                  RADEON_GMC_BRUSH_SOLID_COLOR |
622                  (dev_priv->color_fmt << 8) |
623                  RADEON_GMC_SRC_DATATYPE_COLOR |
624                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
625
626         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
627                 OUT_RING(dev_priv->front_pitch_offset);
628         } else {
629                 OUT_RING(dev_priv->back_pitch_offset);
630         }
631
632         OUT_RING(color);
633
634         OUT_RING((x << 16) | y);
635         OUT_RING((w << 16) | h);
636
637         ADVANCE_RING();
638 }
639
640 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
641 {
642         /* Collapse various things into a wait flag -- trying to
643          * guess if userspase slept -- better just to have them tell us.
644          */
645         if (dev_priv->stats.last_frame_reads > 1 ||
646             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
647                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
648         }
649
650         if (dev_priv->stats.freelist_loops) {
651                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
652         }
653
654         /* Purple box for page flipping
655          */
656         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
657                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
658
659         /* Red box if we have to wait for idle at any point
660          */
661         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
662                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
663
664         /* Blue box: lost context?
665          */
666
667         /* Yellow box for texture swaps
668          */
669         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
670                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
671
672         /* Green box if hardware never idles (as far as we can tell)
673          */
674         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
675                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
676
677         /* Draw bars indicating number of buffers allocated
678          * (not a great measure, easily confused)
679          */
680         if (dev_priv->stats.requested_bufs) {
681                 if (dev_priv->stats.requested_bufs > 100)
682                         dev_priv->stats.requested_bufs = 100;
683
684                 radeon_clear_box(dev_priv, 4, 16,
685                                  dev_priv->stats.requested_bufs, 4,
686                                  196, 128, 128);
687         }
688
689         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
690
691 }
692
693 /* ================================================================
694  * CP command dispatch functions
695  */
696
697 static void radeon_cp_dispatch_clear(drm_device_t * dev,
698                                      drm_radeon_clear_t * clear,
699                                      drm_radeon_clear_rect_t * depth_boxes)
700 {
701         drm_radeon_private_t *dev_priv = dev->dev_private;
702         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
703         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
704         int nbox = sarea_priv->nbox;
705         drm_clip_rect_t *pbox = sarea_priv->boxes;
706         unsigned int flags = clear->flags;
707         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
708         int i;
709         RING_LOCALS;
710         DRM_DEBUG("flags = 0x%x\n", flags);
711
712         dev_priv->stats.clears++;
713
714         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
715                 unsigned int tmp = flags;
716
717                 flags &= ~(RADEON_FRONT | RADEON_BACK);
718                 if (tmp & RADEON_FRONT)
719                         flags |= RADEON_BACK;
720                 if (tmp & RADEON_BACK)
721                         flags |= RADEON_FRONT;
722         }
723
724         if (flags & (RADEON_FRONT | RADEON_BACK)) {
725
726                 BEGIN_RING(4);
727
728                 /* Ensure the 3D stream is idle before doing a
729                  * 2D fill to clear the front or back buffer.
730                  */
731                 RADEON_WAIT_UNTIL_3D_IDLE();
732
733                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
734                 OUT_RING(clear->color_mask);
735
736                 ADVANCE_RING();
737
738                 /* Make sure we restore the 3D state next time.
739                  */
740                 dev_priv->sarea_priv->ctx_owner = 0;
741
742                 for (i = 0; i < nbox; i++) {
743                         int x = pbox[i].x1;
744                         int y = pbox[i].y1;
745                         int w = pbox[i].x2 - x;
746                         int h = pbox[i].y2 - y;
747
748                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
749                                   x, y, w, h, flags);
750
751                         if (flags & RADEON_FRONT) {
752                                 BEGIN_RING(6);
753
754                                 OUT_RING(CP_PACKET3
755                                          (RADEON_CNTL_PAINT_MULTI, 4));
756                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
757                                          RADEON_GMC_BRUSH_SOLID_COLOR |
758                                          (dev_priv->
759                                           color_fmt << 8) |
760                                          RADEON_GMC_SRC_DATATYPE_COLOR |
761                                          RADEON_ROP3_P |
762                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
763
764                                 OUT_RING(dev_priv->front_pitch_offset);
765                                 OUT_RING(clear->clear_color);
766
767                                 OUT_RING((x << 16) | y);
768                                 OUT_RING((w << 16) | h);
769
770                                 ADVANCE_RING();
771                         }
772
773                         if (flags & RADEON_BACK) {
774                                 BEGIN_RING(6);
775
776                                 OUT_RING(CP_PACKET3
777                                          (RADEON_CNTL_PAINT_MULTI, 4));
778                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
779                                          RADEON_GMC_BRUSH_SOLID_COLOR |
780                                          (dev_priv->
781                                           color_fmt << 8) |
782                                          RADEON_GMC_SRC_DATATYPE_COLOR |
783                                          RADEON_ROP3_P |
784                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
785
786                                 OUT_RING(dev_priv->back_pitch_offset);
787                                 OUT_RING(clear->clear_color);
788
789                                 OUT_RING((x << 16) | y);
790                                 OUT_RING((w << 16) | h);
791
792                                 ADVANCE_RING();
793                         }
794                 }
795         }
796
797         /* hyper z clear */
798         /* no docs available, based on reverse engeneering by Stephane Marchesin */
799         if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
800
801                 int i;
802                 int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? 
803                         (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
804                 
805                 u32 clearmask;
806
807                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
808                         ((clear->depth_mask & 0xff) << 24);
809         
810                 
811                 /* Make sure we restore the 3D state next time.
812                  * we haven't touched any "normal" state - still need this?
813                  */
814                 dev_priv->sarea_priv->ctx_owner = 0;
815
816                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
817                 /* FIXME : reverse engineer that for Rx00 cards */
818                 /* FIXME : the mask supposedly contains low-res z values. So can't set
819                    just to the max (0xff? or actually 0x3fff?), need to take z clear
820                    value into account? */
821                 /* pattern seems to work for r100, though get slight
822                    rendering errors with glxgears. If hierz is not enabled for r100,
823                    only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
824                    other ones are ignored, and the same clear mask can be used. That's
825                    very different behaviour than R200 which needs different clear mask
826                    and different number of tiles to clear if hierz is enabled or not !?!
827                 */
828                         clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
829                 }
830                 else {
831                 /* clear mask : chooses the clearing pattern.
832                    rv250: could be used to clear only parts of macrotiles
833                    (but that would get really complicated...)?
834                    bit 0 and 1 (either or both of them ?!?!) are used to
835                    not clear tile (or maybe one of the bits indicates if the tile is
836                    compressed or not), bit 2 and 3 to not clear tile 1,...,.
837                    Pattern is as follows:
838                         | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
839                    bits -------------------------------------------------
840                         | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
841                    rv100: clearmask covers 2x8 4x1 tiles, but one clear still
842                    covers 256 pixels ?!?
843                 */
844                         clearmask = 0x0;
845                 }
846
847                 BEGIN_RING( 8 );
848                 RADEON_WAIT_UNTIL_2D_IDLE();
849                 OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
850                         tempRB3D_DEPTHCLEARVALUE);
851                 /* what offset is this exactly ? */
852                 OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
853                 /* need ctlstat, otherwise get some strange black flickering */
854                 OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
855                 ADVANCE_RING();
856
857                 for (i = 0; i < nbox; i++) {
858                         int tileoffset, nrtilesx, nrtilesy, j;
859                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
860                         if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
861                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
862                                    maybe r200 actually doesn't need to put the low-res z value into
863                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
864                                    Works for R100, both with hierz and without.
865                                    R100 seems to operate on 2x1 8x8 tiles, but...
866                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
867                                    problematic with resolutions which are not 64 pix aligned? */
868                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
869                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
870                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
871                                 for (j = 0; j <= nrtilesy; j++) {
872                                         BEGIN_RING( 4 );
873                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
874                                         /* first tile */
875                                         OUT_RING( tileoffset * 8 );
876                                         /* the number of tiles to clear */
877                                         OUT_RING( nrtilesx + 4 );
878                                         /* clear mask : chooses the clearing pattern. */
879                                         OUT_RING( clearmask );
880                                         ADVANCE_RING();
881                                         tileoffset += depthpixperline >> 6;
882                                 }
883                         }
884                         else if (dev_priv->microcode_version==UCODE_R200) {
885                                 /* works for rv250. */
886                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
887                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
888                                 nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
889                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
890                                 for (j = 0; j <= nrtilesy; j++) {
891                                         BEGIN_RING( 4 );
892                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
893                                         /* first tile */
894                                         /* judging by the first tile offset needed, could possibly
895                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
896                                            macro tiles, though would still need clear mask for
897                                            right/bottom if truely 4x4 granularity is desired ? */
898                                         OUT_RING( tileoffset * 16 );
899                                         /* the number of tiles to clear */
900                                         OUT_RING( nrtilesx + 1 );
901                                         /* clear mask : chooses the clearing pattern. */
902                                         OUT_RING( clearmask );
903                                         ADVANCE_RING();
904                                         tileoffset += depthpixperline >> 5;
905                                 }
906                         }
907                         else { /* rv 100 */
908                                 /* rv100 might not need 64 pix alignment, who knows */
909                                 /* offsets are, hmm, weird */
910                                 tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
911                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
912                                 nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
913                                 for (j = 0; j <= nrtilesy; j++) {
914                                         BEGIN_RING( 4 );
915                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
916                                         OUT_RING( tileoffset * 128 );
917                                         /* the number of tiles to clear */
918                                         OUT_RING( nrtilesx + 4 );
919                                         /* clear mask : chooses the clearing pattern. */
920                                         OUT_RING( clearmask );
921                                         ADVANCE_RING();
922                                         tileoffset += depthpixperline >> 6;
923                                 }
924                         }
925                 }
926
927                 /* TODO don't always clear all hi-level z tiles */
928                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
929                         && (flags & RADEON_USE_HIERZ))
930                 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
931                 /* FIXME : the mask supposedly contains low-res z values. So can't set
932                    just to the max (0xff? or actually 0x3fff?), need to take z clear
933                    value into account? */
934                 {
935                         BEGIN_RING( 4 );
936                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
937                         OUT_RING( 0x0 ); /* First tile */
938                         OUT_RING( 0x3cc0 );
939                         OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
940                         ADVANCE_RING();
941                 }
942         }
943
944         /* We have to clear the depth and/or stencil buffers by
945          * rendering a quad into just those buffers.  Thus, we have to
946          * make sure the 3D engine is configured correctly.
947          */
948         else if ((dev_priv->microcode_version == UCODE_R200) &&
949                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
950
951                 int tempPP_CNTL;
952                 int tempRE_CNTL;
953                 int tempRB3D_CNTL;
954                 int tempRB3D_ZSTENCILCNTL;
955                 int tempRB3D_STENCILREFMASK;
956                 int tempRB3D_PLANEMASK;
957                 int tempSE_CNTL;
958                 int tempSE_VTE_CNTL;
959                 int tempSE_VTX_FMT_0;
960                 int tempSE_VTX_FMT_1;
961                 int tempSE_VAP_CNTL;
962                 int tempRE_AUX_SCISSOR_CNTL;
963
964                 tempPP_CNTL = 0;
965                 tempRE_CNTL = 0;
966
967                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
968
969                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
970                 tempRB3D_STENCILREFMASK = 0x0;
971
972                 tempSE_CNTL = depth_clear->se_cntl;
973
974                 /* Disable TCL */
975
976                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
977                                           (0x9 <<
978                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
979
980                 tempRB3D_PLANEMASK = 0x0;
981
982                 tempRE_AUX_SCISSOR_CNTL = 0x0;
983
984                 tempSE_VTE_CNTL =
985                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
986
987                 /* Vertex format (X, Y, Z, W) */
988                 tempSE_VTX_FMT_0 =
989                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
990                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
991                 tempSE_VTX_FMT_1 = 0x0;
992
993                 /*
994                  * Depth buffer specific enables
995                  */
996                 if (flags & RADEON_DEPTH) {
997                         /* Enable depth buffer */
998                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
999                 } else {
1000                         /* Disable depth buffer */
1001                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1002                 }
1003
1004                 /*
1005                  * Stencil buffer specific enables
1006                  */
1007                 if (flags & RADEON_STENCIL) {
1008                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1009                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1010                 } else {
1011                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1012                         tempRB3D_STENCILREFMASK = 0x00000000;
1013                 }
1014
1015                 if (flags & RADEON_USE_COMP_ZBUF) {
1016                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1017                                 RADEON_Z_DECOMPRESSION_ENABLE;
1018                 }
1019                 if (flags & RADEON_USE_HIERZ) {
1020                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1021                 }
1022
1023                 BEGIN_RING(26);
1024                 RADEON_WAIT_UNTIL_2D_IDLE();
1025
1026                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1027                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1028                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1029                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1030                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1031                              tempRB3D_STENCILREFMASK);
1032                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1033                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1034                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1035                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1036                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1037                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1038                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1039                 ADVANCE_RING();
1040
1041                 /* Make sure we restore the 3D state next time.
1042                  */
1043                 dev_priv->sarea_priv->ctx_owner = 0;
1044
1045                 for (i = 0; i < nbox; i++) {
1046
1047                         /* Funny that this should be required --
1048                          *  sets top-left?
1049                          */
1050                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1051
1052                         BEGIN_RING(14);
1053                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1054                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1055                                   RADEON_PRIM_WALK_RING |
1056                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1057                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1058                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1059                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1060                         OUT_RING(0x3f800000);
1061                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1062                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1063                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1064                         OUT_RING(0x3f800000);
1065                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1066                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1067                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1068                         OUT_RING(0x3f800000);
1069                         ADVANCE_RING();
1070                 }
1071         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1072
1073                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1074                 
1075                 rb3d_cntl = depth_clear->rb3d_cntl;
1076
1077                 if (flags & RADEON_DEPTH) {
1078                         rb3d_cntl |= RADEON_Z_ENABLE;
1079                 } else {
1080                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1081                 }
1082
1083                 if (flags & RADEON_STENCIL) {
1084                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1085                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1086                 } else {
1087                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1088                         rb3d_stencilrefmask = 0x00000000;
1089                 }
1090
1091                 if (flags & RADEON_USE_COMP_ZBUF) {
1092                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1093                                 RADEON_Z_DECOMPRESSION_ENABLE;
1094                 }
1095                 if (flags & RADEON_USE_HIERZ) {
1096                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1097                 }
1098
1099                 BEGIN_RING(13);
1100                 RADEON_WAIT_UNTIL_2D_IDLE();
1101
1102                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1103                 OUT_RING(0x00000000);
1104                 OUT_RING(rb3d_cntl);
1105
1106                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1107                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1108                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1109                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1110                 ADVANCE_RING();
1111
1112                 /* Make sure we restore the 3D state next time.
1113                  */
1114                 dev_priv->sarea_priv->ctx_owner = 0;
1115
1116                 for (i = 0; i < nbox; i++) {
1117
1118                         /* Funny that this should be required --
1119                          *  sets top-left?
1120                          */
1121                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1122
1123                         BEGIN_RING(15);
1124
1125                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1126                         OUT_RING(RADEON_VTX_Z_PRESENT |
1127                                  RADEON_VTX_PKCOLOR_PRESENT);
1128                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1129                                   RADEON_PRIM_WALK_RING |
1130                                   RADEON_MAOS_ENABLE |
1131                                   RADEON_VTX_FMT_RADEON_MODE |
1132                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1133
1134                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1135                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1136                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1137                         OUT_RING(0x0);
1138
1139                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1140                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1141                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1142                         OUT_RING(0x0);
1143
1144                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1145                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1146                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1147                         OUT_RING(0x0);
1148
1149                         ADVANCE_RING();
1150                 }
1151         }
1152
1153         /* Increment the clear counter.  The client-side 3D driver must
1154          * wait on this value before performing the clear ioctl.  We
1155          * need this because the card's so damned fast...
1156          */
1157         dev_priv->sarea_priv->last_clear++;
1158
1159         BEGIN_RING(4);
1160
1161         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1162         RADEON_WAIT_UNTIL_IDLE();
1163
1164         ADVANCE_RING();
1165 }
1166
1167 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1168 {
1169         drm_radeon_private_t *dev_priv = dev->dev_private;
1170         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1171         int nbox = sarea_priv->nbox;
1172         drm_clip_rect_t *pbox = sarea_priv->boxes;
1173         int i;
1174         RING_LOCALS;
1175         DRM_DEBUG("\n");
1176
1177         /* Do some trivial performance monitoring...
1178          */
1179         if (dev_priv->do_boxes)
1180                 radeon_cp_performance_boxes(dev_priv);
1181
1182         /* Wait for the 3D stream to idle before dispatching the bitblt.
1183          * This will prevent data corruption between the two streams.
1184          */
1185         BEGIN_RING(2);
1186
1187         RADEON_WAIT_UNTIL_3D_IDLE();
1188
1189         ADVANCE_RING();
1190
1191         for (i = 0; i < nbox; i++) {
1192                 int x = pbox[i].x1;
1193                 int y = pbox[i].y1;
1194                 int w = pbox[i].x2 - x;
1195                 int h = pbox[i].y2 - y;
1196
1197                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1198
1199                 BEGIN_RING(7);
1200
1201                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1202                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1203                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1204                          RADEON_GMC_BRUSH_NONE |
1205                          (dev_priv->color_fmt << 8) |
1206                          RADEON_GMC_SRC_DATATYPE_COLOR |
1207                          RADEON_ROP3_S |
1208                          RADEON_DP_SRC_SOURCE_MEMORY |
1209                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1210
1211                 /* Make this work even if front & back are flipped:
1212                  */
1213                 if (dev_priv->current_page == 0) {
1214                         OUT_RING(dev_priv->back_pitch_offset);
1215                         OUT_RING(dev_priv->front_pitch_offset);
1216                 } else {
1217                         OUT_RING(dev_priv->front_pitch_offset);
1218                         OUT_RING(dev_priv->back_pitch_offset);
1219                 }
1220
1221                 OUT_RING((x << 16) | y);
1222                 OUT_RING((x << 16) | y);
1223                 OUT_RING((w << 16) | h);
1224
1225                 ADVANCE_RING();
1226         }
1227
1228         /* Increment the frame counter.  The client-side 3D driver must
1229          * throttle the framerate by waiting for this value before
1230          * performing the swapbuffer ioctl.
1231          */
1232         dev_priv->sarea_priv->last_frame++;
1233
1234         BEGIN_RING(4);
1235
1236         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1237         RADEON_WAIT_UNTIL_2D_IDLE();
1238
1239         ADVANCE_RING();
1240 }
1241
1242 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1243 {
1244         drm_radeon_private_t *dev_priv = dev->dev_private;
1245         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1246         int offset = (dev_priv->current_page == 1)
1247             ? dev_priv->front_offset : dev_priv->back_offset;
1248         RING_LOCALS;
1249         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1250                   __FUNCTION__,
1251                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1252
1253         /* Do some trivial performance monitoring...
1254          */
1255         if (dev_priv->do_boxes) {
1256                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1257                 radeon_cp_performance_boxes(dev_priv);
1258         }
1259
1260         /* Update the frame offsets for both CRTCs
1261          */
1262         BEGIN_RING(6);
1263
1264         RADEON_WAIT_UNTIL_3D_IDLE();
1265         OUT_RING_REG(RADEON_CRTC_OFFSET,
1266                      ((sarea->frame.y * dev_priv->front_pitch +
1267                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1268                      + offset);
1269         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1270                      + offset);
1271
1272         ADVANCE_RING();
1273
1274         /* Increment the frame counter.  The client-side 3D driver must
1275          * throttle the framerate by waiting for this value before
1276          * performing the swapbuffer ioctl.
1277          */
1278         dev_priv->sarea_priv->last_frame++;
1279         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1280             1 - dev_priv->current_page;
1281
1282         BEGIN_RING(2);
1283
1284         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1285
1286         ADVANCE_RING();
1287 }
1288
1289 static int bad_prim_vertex_nr(int primitive, int nr)
1290 {
1291         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1292         case RADEON_PRIM_TYPE_NONE:
1293         case RADEON_PRIM_TYPE_POINT:
1294                 return nr < 1;
1295         case RADEON_PRIM_TYPE_LINE:
1296                 return (nr & 1) || nr == 0;
1297         case RADEON_PRIM_TYPE_LINE_STRIP:
1298                 return nr < 2;
1299         case RADEON_PRIM_TYPE_TRI_LIST:
1300         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1301         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1302         case RADEON_PRIM_TYPE_RECT_LIST:
1303                 return nr % 3 || nr == 0;
1304         case RADEON_PRIM_TYPE_TRI_FAN:
1305         case RADEON_PRIM_TYPE_TRI_STRIP:
1306                 return nr < 3;
1307         default:
1308                 return 1;
1309         }
1310 }
1311
1312 typedef struct {
1313         unsigned int start;
1314         unsigned int finish;
1315         unsigned int prim;
1316         unsigned int numverts;
1317         unsigned int offset;
1318         unsigned int vc_format;
1319 } drm_radeon_tcl_prim_t;
1320
1321 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1322                                       drm_buf_t * buf,
1323                                       drm_radeon_tcl_prim_t * prim)
1324 {
1325         drm_radeon_private_t *dev_priv = dev->dev_private;
1326         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1327         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1328         int numverts = (int)prim->numverts;
1329         int nbox = sarea_priv->nbox;
1330         int i = 0;
1331         RING_LOCALS;
1332
1333         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1334                   prim->prim,
1335                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1336
1337         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1338                 DRM_ERROR("bad prim %x numverts %d\n",
1339                           prim->prim, prim->numverts);
1340                 return;
1341         }
1342
1343         do {
1344                 /* Emit the next cliprect */
1345                 if (i < nbox) {
1346                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1347                 }
1348
1349                 /* Emit the vertex buffer rendering commands */
1350                 BEGIN_RING(5);
1351
1352                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1353                 OUT_RING(offset);
1354                 OUT_RING(numverts);
1355                 OUT_RING(prim->vc_format);
1356                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1357                          RADEON_COLOR_ORDER_RGBA |
1358                          RADEON_VTX_FMT_RADEON_MODE |
1359                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1360
1361                 ADVANCE_RING();
1362
1363                 i++;
1364         } while (i < nbox);
1365 }
1366
1367 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1368 {
1369         drm_radeon_private_t *dev_priv = dev->dev_private;
1370         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1371         RING_LOCALS;
1372
1373         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1374
1375         /* Emit the vertex buffer age */
1376         BEGIN_RING(2);
1377         RADEON_DISPATCH_AGE(buf_priv->age);
1378         ADVANCE_RING();
1379
1380         buf->pending = 1;
1381         buf->used = 0;
1382 }
1383
1384 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1385                                         drm_buf_t * buf, int start, int end)
1386 {
1387         drm_radeon_private_t *dev_priv = dev->dev_private;
1388         RING_LOCALS;
1389         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1390
1391         if (start != end) {
1392                 int offset = (dev_priv->gart_buffers_offset
1393                               + buf->offset + start);
1394                 int dwords = (end - start + 3) / sizeof(u32);
1395
1396                 /* Indirect buffer data must be an even number of
1397                  * dwords, so if we've been given an odd number we must
1398                  * pad the data with a Type-2 CP packet.
1399                  */
1400                 if (dwords & 1) {
1401                         u32 *data = (u32 *)
1402                             ((char *)dev->agp_buffer_map->handle
1403                              + buf->offset + start);
1404                         data[dwords++] = RADEON_CP_PACKET2;
1405                 }
1406
1407                 /* Fire off the indirect buffer */
1408                 BEGIN_RING(3);
1409
1410                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1411                 OUT_RING(offset);
1412                 OUT_RING(dwords);
1413
1414                 ADVANCE_RING();
1415         }
1416 }
1417
1418 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1419                                        drm_buf_t * elt_buf,
1420                                        drm_radeon_tcl_prim_t * prim)
1421 {
1422         drm_radeon_private_t *dev_priv = dev->dev_private;
1423         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1424         int offset = dev_priv->gart_buffers_offset + prim->offset;
1425         u32 *data;
1426         int dwords;
1427         int i = 0;
1428         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1429         int count = (prim->finish - start) / sizeof(u16);
1430         int nbox = sarea_priv->nbox;
1431
1432         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1433                   prim->prim,
1434                   prim->vc_format,
1435                   prim->start, prim->finish, prim->offset, prim->numverts);
1436
1437         if (bad_prim_vertex_nr(prim->prim, count)) {
1438                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1439                 return;
1440         }
1441
1442         if (start >= prim->finish || (prim->start & 0x7)) {
1443                 DRM_ERROR("buffer prim %d\n", prim->prim);
1444                 return;
1445         }
1446
1447         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1448
1449         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1450                         elt_buf->offset + prim->start);
1451
1452         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1453         data[1] = offset;
1454         data[2] = prim->numverts;
1455         data[3] = prim->vc_format;
1456         data[4] = (prim->prim |
1457                    RADEON_PRIM_WALK_IND |
1458                    RADEON_COLOR_ORDER_RGBA |
1459                    RADEON_VTX_FMT_RADEON_MODE |
1460                    (count << RADEON_NUM_VERTICES_SHIFT));
1461
1462         do {
1463                 if (i < nbox)
1464                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1465
1466                 radeon_cp_dispatch_indirect(dev, elt_buf,
1467                                             prim->start, prim->finish);
1468
1469                 i++;
1470         } while (i < nbox);
1471
1472 }
1473
1474 #define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
1475
1476 static int radeon_cp_dispatch_texture(DRMFILE filp,
1477                                       drm_device_t * dev,
1478                                       drm_radeon_texture_t * tex,
1479                                       drm_radeon_tex_image_t * image)
1480 {
1481         drm_radeon_private_t *dev_priv = dev->dev_private;
1482         drm_file_t *filp_priv;
1483         drm_buf_t *buf;
1484         u32 format;
1485         u32 *buffer;
1486         const u8 __user *data;
1487         int size, dwords, tex_width, blit_width;
1488         u32 height;
1489         int i;
1490         u32 texpitch, microtile;
1491         RING_LOCALS;
1492
1493         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1494
1495         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1496                 DRM_ERROR("Invalid destination offset\n");
1497                 return DRM_ERR(EINVAL);
1498         }
1499
1500         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1501
1502         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1503          * up with the texture data from the host data blit, otherwise
1504          * part of the texture image may be corrupted.
1505          */
1506         BEGIN_RING(4);
1507         RADEON_FLUSH_CACHE();
1508         RADEON_WAIT_UNTIL_IDLE();
1509         ADVANCE_RING();
1510
1511 #ifdef __BIG_ENDIAN
1512         /* The Mesa texture functions provide the data in little endian as the
1513          * chip wants it, but we need to compensate for the fact that the CP
1514          * ring gets byte-swapped
1515          */
1516         BEGIN_RING(2);
1517         OUT_RING_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT);
1518         ADVANCE_RING();
1519 #endif
1520
1521         /* The compiler won't optimize away a division by a variable,
1522          * even if the only legal values are powers of two.  Thus, we'll
1523          * use a shift instead.
1524          */
1525         switch (tex->format) {
1526         case RADEON_TXFORMAT_ARGB8888:
1527         case RADEON_TXFORMAT_RGBA8888:
1528                 format = RADEON_COLOR_FORMAT_ARGB8888;
1529                 tex_width = tex->width * 4;
1530                 blit_width = image->width * 4;
1531                 break;
1532         case RADEON_TXFORMAT_AI88:
1533         case RADEON_TXFORMAT_ARGB1555:
1534         case RADEON_TXFORMAT_RGB565:
1535         case RADEON_TXFORMAT_ARGB4444:
1536         case RADEON_TXFORMAT_VYUY422:
1537         case RADEON_TXFORMAT_YVYU422:
1538                 format = RADEON_COLOR_FORMAT_RGB565;
1539                 tex_width = tex->width * 2;
1540                 blit_width = image->width * 2;
1541                 break;
1542         case RADEON_TXFORMAT_I8:
1543         case RADEON_TXFORMAT_RGB332:
1544                 format = RADEON_COLOR_FORMAT_CI8;
1545                 tex_width = tex->width * 1;
1546                 blit_width = image->width * 1;
1547                 break;
1548         default:
1549                 DRM_ERROR("invalid texture format %d\n", tex->format);
1550                 return DRM_ERR(EINVAL);
1551         }
1552         texpitch = tex->pitch;
1553         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1554                 microtile = 1;
1555                 if (tex_width < 64) {
1556                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1557                         /* we got tiled coordinates, untile them */
1558                         image->x *= 2;
1559                 }
1560         }
1561         else microtile = 0;
1562
1563         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1564
1565         do {
1566                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1567                           tex->offset >> 10, tex->pitch, tex->format,
1568                           image->x, image->y, image->width, image->height);
1569
1570                 /* Make a copy of some parameters in case we have to
1571                  * update them for a multi-pass texture blit.
1572                  */
1573                 height = image->height;
1574                 data = (const u8 __user *)image->data;
1575
1576                 size = height * blit_width;
1577
1578                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1579                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1580                         size = height * blit_width;
1581                 } else if (size < 4 && size > 0) {
1582                         size = 4;
1583                 } else if (size == 0) {
1584                         return 0;
1585                 }
1586
1587                 buf = radeon_freelist_get(dev);
1588                 if (0 && !buf) {
1589                         radeon_do_cp_idle(dev_priv);
1590                         buf = radeon_freelist_get(dev);
1591                 }
1592                 if (!buf) {
1593                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1594                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1595                                 return DRM_ERR(EFAULT);
1596                         return DRM_ERR(EAGAIN);
1597                 }
1598
1599                 /* Dispatch the indirect buffer.
1600                  */
1601                 buffer =
1602                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1603                 dwords = size / 4;
1604                 buffer[0] = CP_PACKET3(RADEON_CNTL_HOSTDATA_BLT, dwords + 6);
1605                 buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1606                              RADEON_GMC_BRUSH_NONE |
1607                              (format << 8) |
1608                              RADEON_GMC_SRC_DATATYPE_COLOR |
1609                              RADEON_ROP3_S |
1610                              RADEON_DP_SRC_SOURCE_HOST_DATA |
1611                              RADEON_GMC_CLR_CMP_CNTL_DIS |
1612                              RADEON_GMC_WR_MSK_DIS);
1613
1614                 buffer[2] = (texpitch << 22) | (tex->offset >> 10);
1615                 buffer[3] = 0xffffffff;
1616                 buffer[4] = 0xffffffff;
1617                 buffer[5] = (image->y << 16) | image->x;
1618                 buffer[6] = (height << 16) | image->width;
1619                 buffer[7] = dwords;
1620                 buffer += 8;
1621
1622                 if (microtile) {
1623                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1624                            however, we cannot use blitter directly for texture width < 64 bytes,
1625                            since minimum tex pitch is 64 bytes and we need this to match
1626                            the texture width, otherwise the blitter will tile it wrong.
1627                            Thus, tiling manually in this case. Additionally, need to special
1628                            case tex height = 1, since our actual image will have height 2
1629                            and we need to ensure we don't read beyond the texture size
1630                            from user space. */
1631                         if (tex->height == 1) {
1632                                 if (tex_width >= 64 || tex_width <= 16) {
1633                                         if (DRM_COPY_FROM_USER(buffer, data,
1634                                                        tex_width * sizeof(u32))) {
1635                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1636                                                           tex_width);
1637                                                 return DRM_ERR(EFAULT);
1638                                         }
1639                                 } else if (tex_width == 32) {
1640                                         if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1641                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1642                                                           tex_width);
1643                                                 return DRM_ERR(EFAULT);
1644                                         }
1645                                         if (DRM_COPY_FROM_USER(buffer + 8, data + 16, 16)) {
1646                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1647                                                           tex_width);
1648                                                 return DRM_ERR(EFAULT);
1649                                         }
1650                                 }
1651                         } else if (tex_width >= 64 || tex_width == 16) {
1652                                 if (DRM_COPY_FROM_USER(buffer, data,
1653                                                        dwords * sizeof(u32))) {
1654                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1655                                                   dwords);
1656                                         return DRM_ERR(EFAULT);
1657                                 }
1658                         } else if (tex_width < 16) {
1659                                 for (i = 0; i < tex->height; i++) {
1660                                         if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1661                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1662                                                           tex_width);
1663                                                 return DRM_ERR(EFAULT);
1664                                         }
1665                                         buffer += 4;
1666                                         data += tex_width;
1667                                 }
1668                         } else if (tex_width == 32) {
1669                         /* TODO: make sure this works when not fitting in one buffer
1670                                 (i.e. 32bytes x 2048...) */
1671                                 for (i = 0; i < tex->height; i += 2) {
1672                                         if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1673                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1674                                                           tex_width);
1675                                                 return DRM_ERR(EFAULT);
1676                                         }
1677                                         data += 16;
1678                                         if (DRM_COPY_FROM_USER(buffer + 8, data, 16)) {
1679                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1680                                                           tex_width);
1681                                                 return DRM_ERR(EFAULT);
1682                                         }
1683                                         data += 16;
1684                                         if (DRM_COPY_FROM_USER(buffer + 4, data, 16)) {
1685                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1686                                                           tex_width);
1687                                                 return DRM_ERR(EFAULT);
1688                                         }
1689                                         data += 16;
1690                                         if (DRM_COPY_FROM_USER(buffer + 12, data, 16)) {
1691                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1692                                                           tex_width);
1693                                                 return DRM_ERR(EFAULT);
1694                                         }
1695                                         data += 16;
1696                                         buffer += 16;
1697                                 }
1698                         }
1699                 }
1700                 else {
1701                         if (tex_width >= 32) {
1702                                 /* Texture image width is larger than the minimum, so we
1703                                  * can upload it directly.
1704                                  */
1705                                 if (DRM_COPY_FROM_USER(buffer, data,
1706                                                        dwords * sizeof(u32))) {
1707                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1708                                                   dwords);
1709                                         return DRM_ERR(EFAULT);
1710                                 }
1711                         } else {
1712                                 /* Texture image width is less than the minimum, so we
1713                                  * need to pad out each image scanline to the minimum
1714                                  * width.
1715                                  */
1716                                 for (i = 0; i < tex->height; i++) {
1717                                         if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1718                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1719                                                           tex_width);
1720                                                 return DRM_ERR(EFAULT);
1721                                         }
1722                                         buffer += 8;
1723                                         data += tex_width;
1724                                 }
1725                         }
1726                 }
1727
1728                 buf->filp = filp;
1729                 buf->used = (dwords + 8) * sizeof(u32);
1730                 radeon_cp_dispatch_indirect(dev, buf, 0, buf->used);
1731                 radeon_cp_discard_buffer(dev, buf);
1732
1733                 /* Update the input parameters for next time */
1734                 image->y += height;
1735                 image->height -= height;
1736                 image->data = (const u8 __user *)image->data + size;
1737         } while (image->height > 0);
1738
1739         /* Flush the pixel cache after the blit completes.  This ensures
1740          * the texture data is written out to memory before rendering
1741          * continues.
1742          */
1743         BEGIN_RING(4);
1744         RADEON_FLUSH_CACHE();
1745         RADEON_WAIT_UNTIL_2D_IDLE();
1746         ADVANCE_RING();
1747         return 0;
1748 }
1749
1750 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1751 {
1752         drm_radeon_private_t *dev_priv = dev->dev_private;
1753         int i;
1754         RING_LOCALS;
1755         DRM_DEBUG("\n");
1756
1757         BEGIN_RING(35);
1758
1759         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1760         OUT_RING(0x00000000);
1761
1762         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1763         for (i = 0; i < 32; i++) {
1764                 OUT_RING(stipple[i]);
1765         }
1766
1767         ADVANCE_RING();
1768 }
1769
1770 static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
1771 {
1772         if (!dev_priv->mmio)
1773                 return;
1774
1775         radeon_do_cp_idle(dev_priv);
1776
1777         RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
1778                 dev_priv->surfaces[surf_index].flags);
1779         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
1780                 dev_priv->surfaces[surf_index].lower);
1781         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
1782                 dev_priv->surfaces[surf_index].upper);
1783 }
1784
1785 /* Allocates a virtual surface
1786  * doesn't always allocate a real surface, will stretch an existing 
1787  * surface when possible.
1788  *
1789  * Note that refcount can be at most 2, since during a free refcount=3
1790  * might mean we have to allocate a new surface which might not always
1791  * be available.
1792  * For example : we allocate three contigous surfaces ABC. If B is 
1793  * freed, we suddenly need two surfaces to store A and C, which might
1794  * not always be available.
1795  */
1796 static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
1797 {
1798         struct radeon_virt_surface *s;
1799         int i;
1800         int virt_surface_index;
1801         uint32_t new_upper, new_lower;
1802
1803         new_lower = new->address;
1804         new_upper = new_lower + new->size - 1;
1805
1806         /* sanity check */
1807         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1808                 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
1809                 ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1810                 return -1;
1811
1812         /* make sure there is no overlap with existing surfaces */
1813         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1814                 if ((dev_priv->surfaces[i].refcount != 0) &&
1815                 (( (new_lower >= dev_priv->surfaces[i].lower) &&
1816                         (new_lower < dev_priv->surfaces[i].upper) ) ||
1817                  ( (new_lower < dev_priv->surfaces[i].lower) &&
1818                         (new_upper > dev_priv->surfaces[i].lower) )) ){
1819                 return -1;}
1820         }
1821
1822         /* find a virtual surface */
1823         for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1824                 if (dev_priv->virt_surfaces[i].filp == 0)
1825                         break;
1826         if (i == 2*RADEON_MAX_SURFACES) {
1827                 return -1;}
1828         virt_surface_index = i;
1829
1830         /* try to reuse an existing surface */
1831         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1832                 /* extend before */
1833                 if ((dev_priv->surfaces[i].refcount == 1) &&
1834                   (new->flags == dev_priv->surfaces[i].flags) &&
1835                   (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1836                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1837                         s->surface_index = i;
1838                         s->lower = new_lower;
1839                         s->upper = new_upper;
1840                         s->flags = new->flags;
1841                         s->filp = filp;
1842                         dev_priv->surfaces[i].refcount++;
1843                         dev_priv->surfaces[i].lower = s->lower;
1844                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1845                         return virt_surface_index;
1846                 }
1847
1848                 /* extend after */
1849                 if ((dev_priv->surfaces[i].refcount == 1) &&
1850                   (new->flags == dev_priv->surfaces[i].flags) &&
1851                   (new_lower == dev_priv->surfaces[i].upper + 1)) {
1852                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1853                         s->surface_index = i;
1854                         s->lower = new_lower;
1855                         s->upper = new_upper;
1856                         s->flags = new->flags;
1857                         s->filp = filp;
1858                         dev_priv->surfaces[i].refcount++;
1859                         dev_priv->surfaces[i].upper = s->upper;
1860                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1861                         return virt_surface_index;
1862                 }
1863         }
1864
1865         /* okay, we need a new one */
1866         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1867                 if (dev_priv->surfaces[i].refcount == 0) {
1868                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1869                         s->surface_index = i;
1870                         s->lower = new_lower;
1871                         s->upper = new_upper;
1872                         s->flags = new->flags;
1873                         s->filp = filp;
1874                         dev_priv->surfaces[i].refcount = 1;
1875                         dev_priv->surfaces[i].lower = s->lower;
1876                         dev_priv->surfaces[i].upper = s->upper;
1877                         dev_priv->surfaces[i].flags = s->flags;
1878                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1879                         return virt_surface_index;
1880                 }
1881         }
1882
1883         /* we didn't find anything */
1884         return -1;
1885 }
1886
1887 static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
1888 {
1889         struct radeon_virt_surface *s;
1890         int i;
1891         /* find the virtual surface */
1892         for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
1893                 s = &(dev_priv->virt_surfaces[i]);
1894                 if (s->filp) {
1895                         if ((lower == s->lower) && (filp == s->filp)) {
1896                                 if (dev_priv->surfaces[s->surface_index].lower == s->lower)
1897                                         dev_priv->surfaces[s->surface_index].lower = s->upper;
1898
1899                                 if (dev_priv->surfaces[s->surface_index].upper == s->upper)
1900                                         dev_priv->surfaces[s->surface_index].upper = s->lower;
1901
1902                                 dev_priv->surfaces[s->surface_index].refcount--;
1903                                 if (dev_priv->surfaces[s->surface_index].refcount == 0)
1904                                         dev_priv->surfaces[s->surface_index].flags = 0;
1905                                 s->filp = 0;
1906                                 radeon_apply_surface_regs(s->surface_index, dev_priv);
1907                                 return 0;
1908                         }
1909                 }
1910         }
1911         return 1;
1912 }
1913
1914 static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
1915 {
1916         int i;
1917         for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1918         {
1919                 if (dev_priv->virt_surfaces[i].filp == filp)
1920                         free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
1921         }
1922 }
1923
1924 /* ================================================================
1925  * IOCTL functions
1926  */
1927
1928 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1929 {
1930         DRM_DEVICE;
1931         drm_radeon_private_t *dev_priv = dev->dev_private;
1932         drm_radeon_surface_alloc_t alloc;
1933
1934         if (!dev_priv) {
1935                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1936                 return DRM_ERR(EINVAL);
1937         }
1938
1939         DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
1940                                   sizeof(alloc));
1941
1942         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1943                 return DRM_ERR(EINVAL);
1944         else
1945                 return 0;
1946 }
1947
1948 static int radeon_surface_free(DRM_IOCTL_ARGS)
1949 {
1950         DRM_DEVICE;
1951         drm_radeon_private_t *dev_priv = dev->dev_private;
1952         drm_radeon_surface_free_t memfree;
1953
1954         if (!dev_priv) {
1955                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1956                 return DRM_ERR(EINVAL);
1957         }
1958
1959         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *)data,
1960                                   sizeof(memfree) );
1961
1962         if (free_surface(filp, dev_priv, memfree.address))
1963                 return DRM_ERR(EINVAL);
1964         else
1965                 return 0;
1966 }
1967
1968 static int radeon_cp_clear(DRM_IOCTL_ARGS)
1969 {
1970         DRM_DEVICE;
1971         drm_radeon_private_t *dev_priv = dev->dev_private;
1972         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1973         drm_radeon_clear_t clear;
1974         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1975         DRM_DEBUG("\n");
1976
1977         LOCK_TEST_WITH_RETURN(dev, filp);
1978
1979         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
1980                                  sizeof(clear));
1981
1982         RING_SPACE_TEST_WITH_RETURN(dev_priv);
1983
1984         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
1985                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
1986
1987         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
1988                                sarea_priv->nbox * sizeof(depth_boxes[0])))
1989                 return DRM_ERR(EFAULT);
1990
1991         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
1992
1993         COMMIT_RING();
1994         return 0;
1995 }
1996
1997 /* Not sure why this isn't set all the time:
1998  */
1999 static int radeon_do_init_pageflip(drm_device_t * dev)
2000 {
2001         drm_radeon_private_t *dev_priv = dev->dev_private;
2002         RING_LOCALS;
2003
2004         DRM_DEBUG("\n");
2005
2006         BEGIN_RING(6);
2007         RADEON_WAIT_UNTIL_3D_IDLE();
2008         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2009         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2010                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2011         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2012         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2013                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2014         ADVANCE_RING();
2015
2016         dev_priv->page_flipping = 1;
2017         dev_priv->current_page = 0;
2018         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2019
2020         return 0;
2021 }
2022
2023 /* Called whenever a client dies, from drm_release.
2024  * NOTE:  Lock isn't necessarily held when this is called!
2025  */
2026 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2027 {
2028         drm_radeon_private_t *dev_priv = dev->dev_private;
2029         DRM_DEBUG("\n");
2030
2031         if (dev_priv->current_page != 0)
2032                 radeon_cp_dispatch_flip(dev);
2033
2034         dev_priv->page_flipping = 0;
2035         return 0;
2036 }
2037
2038 /* Swapping and flipping are different operations, need different ioctls.
2039  * They can & should be intermixed to support multiple 3d windows.
2040  */
2041 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2042 {
2043         DRM_DEVICE;
2044         drm_radeon_private_t *dev_priv = dev->dev_private;
2045         DRM_DEBUG("\n");
2046
2047         LOCK_TEST_WITH_RETURN(dev, filp);
2048
2049         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2050
2051         if (!dev_priv->page_flipping)
2052                 radeon_do_init_pageflip(dev);
2053
2054         radeon_cp_dispatch_flip(dev);
2055
2056         COMMIT_RING();
2057         return 0;
2058 }
2059
2060 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2061 {
2062         DRM_DEVICE;
2063         drm_radeon_private_t *dev_priv = dev->dev_private;
2064         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2065         DRM_DEBUG("\n");
2066
2067         LOCK_TEST_WITH_RETURN(dev, filp);
2068
2069         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2070
2071         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2072                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2073
2074         radeon_cp_dispatch_swap(dev);
2075         dev_priv->sarea_priv->ctx_owner = 0;
2076
2077         COMMIT_RING();
2078         return 0;
2079 }
2080
2081 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2082 {
2083         DRM_DEVICE;
2084         drm_radeon_private_t *dev_priv = dev->dev_private;
2085         drm_file_t *filp_priv;
2086         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2087         drm_device_dma_t *dma = dev->dma;
2088         drm_buf_t *buf;
2089         drm_radeon_vertex_t vertex;
2090         drm_radeon_tcl_prim_t prim;
2091
2092         LOCK_TEST_WITH_RETURN(dev, filp);
2093
2094         if (!dev_priv) {
2095                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2096                 return DRM_ERR(EINVAL);
2097         }
2098
2099         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2100
2101         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2102                                  sizeof(vertex));
2103
2104         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2105                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2106
2107         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2108                 DRM_ERROR("buffer index %d (of %d max)\n",
2109                           vertex.idx, dma->buf_count - 1);
2110                 return DRM_ERR(EINVAL);
2111         }
2112         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2113                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2114                 return DRM_ERR(EINVAL);
2115         }
2116
2117         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2118         VB_AGE_TEST_WITH_RETURN(dev_priv);
2119
2120         buf = dma->buflist[vertex.idx];
2121
2122         if (buf->filp != filp) {
2123                 DRM_ERROR("process %d using buffer owned by %p\n",
2124                           DRM_CURRENTPID, buf->filp);
2125                 return DRM_ERR(EINVAL);
2126         }
2127         if (buf->pending) {
2128                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2129                 return DRM_ERR(EINVAL);
2130         }
2131
2132         /* Build up a prim_t record:
2133          */
2134         if (vertex.count) {
2135                 buf->used = vertex.count;       /* not used? */
2136
2137                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2138                         if (radeon_emit_state(dev_priv, filp_priv,
2139                                               &sarea_priv->context_state,
2140                                               sarea_priv->tex_state,
2141                                               sarea_priv->dirty)) {
2142                                 DRM_ERROR("radeon_emit_state failed\n");
2143                                 return DRM_ERR(EINVAL);
2144                         }
2145
2146                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2147                                                RADEON_UPLOAD_TEX1IMAGES |
2148                                                RADEON_UPLOAD_TEX2IMAGES |
2149                                                RADEON_REQUIRE_QUIESCENCE);
2150                 }
2151
2152                 prim.start = 0;
2153                 prim.finish = vertex.count;     /* unused */
2154                 prim.prim = vertex.prim;
2155                 prim.numverts = vertex.count;
2156                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2157
2158                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2159         }
2160
2161         if (vertex.discard) {
2162                 radeon_cp_discard_buffer(dev, buf);
2163         }
2164
2165         COMMIT_RING();
2166         return 0;
2167 }
2168
2169 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2170 {
2171         DRM_DEVICE;
2172         drm_radeon_private_t *dev_priv = dev->dev_private;
2173         drm_file_t *filp_priv;
2174         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2175         drm_device_dma_t *dma = dev->dma;
2176         drm_buf_t *buf;
2177         drm_radeon_indices_t elts;
2178         drm_radeon_tcl_prim_t prim;
2179         int count;
2180
2181         LOCK_TEST_WITH_RETURN(dev, filp);
2182
2183         if (!dev_priv) {
2184                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2185                 return DRM_ERR(EINVAL);
2186         }
2187
2188         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2189
2190         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2191                                  sizeof(elts));
2192
2193         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2194                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2195
2196         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2197                 DRM_ERROR("buffer index %d (of %d max)\n",
2198                           elts.idx, dma->buf_count - 1);
2199                 return DRM_ERR(EINVAL);
2200         }
2201         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2202                 DRM_ERROR("buffer prim %d\n", elts.prim);
2203                 return DRM_ERR(EINVAL);
2204         }
2205
2206         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2207         VB_AGE_TEST_WITH_RETURN(dev_priv);
2208
2209         buf = dma->buflist[elts.idx];
2210
2211         if (buf->filp != filp) {
2212                 DRM_ERROR("process %d using buffer owned by %p\n",
2213                           DRM_CURRENTPID, buf->filp);
2214                 return DRM_ERR(EINVAL);
2215         }
2216         if (buf->pending) {
2217                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2218                 return DRM_ERR(EINVAL);
2219         }
2220
2221         count = (elts.end - elts.start) / sizeof(u16);
2222         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2223
2224         if (elts.start & 0x7) {
2225                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2226                 return DRM_ERR(EINVAL);
2227         }
2228         if (elts.start < buf->used) {
2229                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2230                 return DRM_ERR(EINVAL);
2231         }
2232
2233         buf->used = elts.end;
2234
2235         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2236                 if (radeon_emit_state(dev_priv, filp_priv,
2237                                       &sarea_priv->context_state,
2238                                       sarea_priv->tex_state,
2239                                       sarea_priv->dirty)) {
2240                         DRM_ERROR("radeon_emit_state failed\n");
2241                         return DRM_ERR(EINVAL);
2242                 }
2243
2244                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2245                                        RADEON_UPLOAD_TEX1IMAGES |
2246                                        RADEON_UPLOAD_TEX2IMAGES |
2247                                        RADEON_REQUIRE_QUIESCENCE);
2248         }
2249
2250         /* Build up a prim_t record:
2251          */
2252         prim.start = elts.start;
2253         prim.finish = elts.end;
2254         prim.prim = elts.prim;
2255         prim.offset = 0;        /* offset from start of dma buffers */
2256         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2257         prim.vc_format = dev_priv->sarea_priv->vc_format;
2258
2259         radeon_cp_dispatch_indices(dev, buf, &prim);
2260         if (elts.discard) {
2261                 radeon_cp_discard_buffer(dev, buf);
2262         }
2263
2264         COMMIT_RING();
2265         return 0;
2266 }
2267
2268 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2269 {
2270         DRM_DEVICE;
2271         drm_radeon_private_t *dev_priv = dev->dev_private;
2272         drm_radeon_texture_t tex;
2273         drm_radeon_tex_image_t image;
2274         int ret;
2275
2276         LOCK_TEST_WITH_RETURN(dev, filp);
2277
2278         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2279                                  sizeof(tex));
2280
2281         if (tex.image == NULL) {
2282                 DRM_ERROR("null texture image!\n");
2283                 return DRM_ERR(EINVAL);
2284         }
2285
2286         if (DRM_COPY_FROM_USER(&image,
2287                                (drm_radeon_tex_image_t __user *) tex.image,
2288                                sizeof(image)))
2289                 return DRM_ERR(EFAULT);
2290
2291         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2292         VB_AGE_TEST_WITH_RETURN(dev_priv);
2293
2294         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2295
2296         COMMIT_RING();
2297         return ret;
2298 }
2299
2300 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2301 {
2302         DRM_DEVICE;
2303         drm_radeon_private_t *dev_priv = dev->dev_private;
2304         drm_radeon_stipple_t stipple;
2305         u32 mask[32];
2306
2307         LOCK_TEST_WITH_RETURN(dev, filp);
2308
2309         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2310                                  sizeof(stipple));
2311
2312         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2313                 return DRM_ERR(EFAULT);
2314
2315         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2316
2317         radeon_cp_dispatch_stipple(dev, mask);
2318
2319         COMMIT_RING();
2320         return 0;
2321 }
2322
2323 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2324 {
2325         DRM_DEVICE;
2326         drm_radeon_private_t *dev_priv = dev->dev_private;
2327         drm_device_dma_t *dma = dev->dma;
2328         drm_buf_t *buf;
2329         drm_radeon_indirect_t indirect;
2330         RING_LOCALS;
2331
2332         LOCK_TEST_WITH_RETURN(dev, filp);
2333
2334         if (!dev_priv) {
2335                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2336                 return DRM_ERR(EINVAL);
2337         }
2338
2339         DRM_COPY_FROM_USER_IOCTL(indirect,
2340                                  (drm_radeon_indirect_t __user *) data,
2341                                  sizeof(indirect));
2342
2343         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2344                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2345
2346         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2347                 DRM_ERROR("buffer index %d (of %d max)\n",
2348                           indirect.idx, dma->buf_count - 1);
2349                 return DRM_ERR(EINVAL);
2350         }
2351
2352         buf = dma->buflist[indirect.idx];
2353
2354         if (buf->filp != filp) {
2355                 DRM_ERROR("process %d using buffer owned by %p\n",
2356                           DRM_CURRENTPID, buf->filp);
2357                 return DRM_ERR(EINVAL);
2358         }
2359         if (buf->pending) {
2360                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2361                 return DRM_ERR(EINVAL);
2362         }
2363
2364         if (indirect.start < buf->used) {
2365                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2366                           indirect.start, buf->used);
2367                 return DRM_ERR(EINVAL);
2368         }
2369
2370         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2371         VB_AGE_TEST_WITH_RETURN(dev_priv);
2372
2373         buf->used = indirect.end;
2374
2375         /* Wait for the 3D stream to idle before the indirect buffer
2376          * containing 2D acceleration commands is processed.
2377          */
2378         BEGIN_RING(2);
2379
2380         RADEON_WAIT_UNTIL_3D_IDLE();
2381
2382         ADVANCE_RING();
2383
2384         /* Dispatch the indirect buffer full of commands from the
2385          * X server.  This is insecure and is thus only available to
2386          * privileged clients.
2387          */
2388         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2389         if (indirect.discard) {
2390                 radeon_cp_discard_buffer(dev, buf);
2391         }
2392
2393         COMMIT_RING();
2394         return 0;
2395 }
2396
2397 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2398 {
2399         DRM_DEVICE;
2400         drm_radeon_private_t *dev_priv = dev->dev_private;
2401         drm_file_t *filp_priv;
2402         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2403         drm_device_dma_t *dma = dev->dma;
2404         drm_buf_t *buf;
2405         drm_radeon_vertex2_t vertex;
2406         int i;
2407         unsigned char laststate;
2408
2409         LOCK_TEST_WITH_RETURN(dev, filp);
2410
2411         if (!dev_priv) {
2412                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2413                 return DRM_ERR(EINVAL);
2414         }
2415
2416         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2417
2418         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2419                                  sizeof(vertex));
2420
2421         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2422                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2423
2424         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2425                 DRM_ERROR("buffer index %d (of %d max)\n",
2426                           vertex.idx, dma->buf_count - 1);
2427                 return DRM_ERR(EINVAL);
2428         }
2429
2430         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2431         VB_AGE_TEST_WITH_RETURN(dev_priv);
2432
2433         buf = dma->buflist[vertex.idx];
2434
2435         if (buf->filp != filp) {
2436                 DRM_ERROR("process %d using buffer owned by %p\n",
2437                           DRM_CURRENTPID, buf->filp);
2438                 return DRM_ERR(EINVAL);
2439         }
2440
2441         if (buf->pending) {
2442                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2443                 return DRM_ERR(EINVAL);
2444         }
2445
2446         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2447                 return DRM_ERR(EINVAL);
2448
2449         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2450                 drm_radeon_prim_t prim;
2451                 drm_radeon_tcl_prim_t tclprim;
2452
2453                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2454                         return DRM_ERR(EFAULT);
2455
2456                 if (prim.stateidx != laststate) {
2457                         drm_radeon_state_t state;
2458
2459                         if (DRM_COPY_FROM_USER(&state,
2460                                                &vertex.state[prim.stateidx],
2461                                                sizeof(state)))
2462                                 return DRM_ERR(EFAULT);
2463
2464                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2465                                 DRM_ERROR("radeon_emit_state2 failed\n");
2466                                 return DRM_ERR(EINVAL);
2467                         }
2468
2469                         laststate = prim.stateidx;
2470                 }
2471
2472                 tclprim.start = prim.start;
2473                 tclprim.finish = prim.finish;
2474                 tclprim.prim = prim.prim;
2475                 tclprim.vc_format = prim.vc_format;
2476
2477                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2478                         tclprim.offset = prim.numverts * 64;
2479                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2480
2481                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2482                 } else {
2483                         tclprim.numverts = prim.numverts;
2484                         tclprim.offset = 0;     /* not used */
2485
2486                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2487                 }
2488
2489                 if (sarea_priv->nbox == 1)
2490                         sarea_priv->nbox = 0;
2491         }
2492
2493         if (vertex.discard) {
2494                 radeon_cp_discard_buffer(dev, buf);
2495         }
2496
2497         COMMIT_RING();
2498         return 0;
2499 }
2500
2501 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2502                                drm_file_t * filp_priv,
2503                                drm_radeon_cmd_header_t header,
2504                                drm_radeon_cmd_buffer_t * cmdbuf)
2505 {
2506         int id = (int)header.packet.packet_id;
2507         int sz, reg;
2508         int *data = (int *)cmdbuf->buf;
2509         RING_LOCALS;
2510
2511         if (id >= RADEON_MAX_STATE_PACKETS)
2512                 return DRM_ERR(EINVAL);
2513
2514         sz = packet[id].len;
2515         reg = packet[id].start;
2516
2517         if (sz * sizeof(int) > cmdbuf->bufsz) {
2518                 DRM_ERROR("Packet size provided larger than data provided\n");
2519                 return DRM_ERR(EINVAL);
2520         }
2521
2522         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2523                 DRM_ERROR("Packet verification failed\n");
2524                 return DRM_ERR(EINVAL);
2525         }
2526
2527         BEGIN_RING(sz + 1);
2528         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2529         OUT_RING_TABLE(data, sz);
2530         ADVANCE_RING();
2531
2532         cmdbuf->buf += sz * sizeof(int);
2533         cmdbuf->bufsz -= sz * sizeof(int);
2534         return 0;
2535 }
2536
2537 static __inline__ int radeon_emit_scalars(drm_radeon_private_t * dev_priv,
2538                                           drm_radeon_cmd_header_t header,
2539                                           drm_radeon_cmd_buffer_t * cmdbuf)
2540 {
2541         int sz = header.scalars.count;
2542         int start = header.scalars.offset;
2543         int stride = header.scalars.stride;
2544         RING_LOCALS;
2545
2546         BEGIN_RING(3 + sz);
2547         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2548         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2549         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2550         OUT_RING_TABLE(cmdbuf->buf, sz);
2551         ADVANCE_RING();
2552         cmdbuf->buf += sz * sizeof(int);
2553         cmdbuf->bufsz -= sz * sizeof(int);
2554         return 0;
2555 }
2556
2557 /* God this is ugly
2558  */
2559 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t * dev_priv,
2560                                            drm_radeon_cmd_header_t header,
2561                                            drm_radeon_cmd_buffer_t * cmdbuf)
2562 {
2563         int sz = header.scalars.count;
2564         int start = ((unsigned int)header.scalars.offset) + 0x100;
2565         int stride = header.scalars.stride;
2566         RING_LOCALS;
2567
2568         BEGIN_RING(3 + sz);
2569         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2570         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2571         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2572         OUT_RING_TABLE(cmdbuf->buf, sz);
2573         ADVANCE_RING();
2574         cmdbuf->buf += sz * sizeof(int);
2575         cmdbuf->bufsz -= sz * sizeof(int);
2576         return 0;
2577 }
2578
2579 static __inline__ int radeon_emit_vectors(drm_radeon_private_t * dev_priv,
2580                                           drm_radeon_cmd_header_t header,
2581                                           drm_radeon_cmd_buffer_t * cmdbuf)
2582 {
2583         int sz = header.vectors.count;
2584         int start = header.vectors.offset;
2585         int stride = header.vectors.stride;
2586         RING_LOCALS;
2587
2588         BEGIN_RING(3 + sz);
2589         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2590         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2591         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2592         OUT_RING_TABLE(cmdbuf->buf, sz);
2593         ADVANCE_RING();
2594
2595         cmdbuf->buf += sz * sizeof(int);
2596         cmdbuf->bufsz -= sz * sizeof(int);
2597         return 0;
2598 }
2599
2600 static int radeon_emit_packet3(drm_device_t * dev,
2601                                drm_file_t * filp_priv,
2602                                drm_radeon_cmd_buffer_t * cmdbuf)
2603 {
2604         drm_radeon_private_t *dev_priv = dev->dev_private;
2605         unsigned int cmdsz;
2606         int ret;
2607         RING_LOCALS;
2608
2609         DRM_DEBUG("\n");
2610
2611         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2612                                                   cmdbuf, &cmdsz))) {
2613                 DRM_ERROR("Packet verification failed\n");
2614                 return ret;
2615         }
2616
2617         BEGIN_RING(cmdsz);
2618         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2619         ADVANCE_RING();
2620
2621         cmdbuf->buf += cmdsz * 4;
2622         cmdbuf->bufsz -= cmdsz * 4;
2623         return 0;
2624 }
2625
2626 static int radeon_emit_packet3_cliprect(drm_device_t * dev,
2627                                         drm_file_t * filp_priv,
2628                                         drm_radeon_cmd_buffer_t * cmdbuf,
2629                                         int orig_nbox)
2630 {
2631         drm_radeon_private_t *dev_priv = dev->dev_private;
2632         drm_clip_rect_t box;
2633         unsigned int cmdsz;
2634         int ret;
2635         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2636         int i = 0;
2637         RING_LOCALS;
2638
2639         DRM_DEBUG("\n");
2640
2641         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2642                                                   cmdbuf, &cmdsz))) {
2643                 DRM_ERROR("Packet verification failed\n");
2644                 return ret;
2645         }
2646
2647         if (!orig_nbox)
2648                 goto out;
2649
2650         do {
2651                 if (i < cmdbuf->nbox) {
2652                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2653                                 return DRM_ERR(EFAULT);
2654                         /* FIXME The second and subsequent times round
2655                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2656                          * calling emit_clip_rect(). This fixes a
2657                          * lockup on fast machines when sending
2658                          * several cliprects with a cmdbuf, as when
2659                          * waving a 2D window over a 3D
2660                          * window. Something in the commands from user
2661                          * space seems to hang the card when they're
2662                          * sent several times in a row. That would be
2663                          * the correct place to fix it but this works
2664                          * around it until I can figure that out - Tim
2665                          * Smith */
2666                         if (i) {
2667                                 BEGIN_RING(2);
2668                                 RADEON_WAIT_UNTIL_3D_IDLE();
2669                                 ADVANCE_RING();
2670                         }
2671                         radeon_emit_clip_rect(dev_priv, &box);
2672                 }
2673
2674                 BEGIN_RING(cmdsz);
2675                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2676                 ADVANCE_RING();
2677
2678         } while (++i < cmdbuf->nbox);
2679         if (cmdbuf->nbox == 1)
2680                 cmdbuf->nbox = 0;
2681
2682       out:
2683         cmdbuf->buf += cmdsz * 4;
2684         cmdbuf->bufsz -= cmdsz * 4;
2685         return 0;
2686 }
2687
2688 static int radeon_emit_wait(drm_device_t * dev, int flags)
2689 {
2690         drm_radeon_private_t *dev_priv = dev->dev_private;
2691         RING_LOCALS;
2692
2693         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2694         switch (flags) {
2695         case RADEON_WAIT_2D:
2696                 BEGIN_RING(2);
2697                 RADEON_WAIT_UNTIL_2D_IDLE();
2698                 ADVANCE_RING();
2699                 break;
2700         case RADEON_WAIT_3D:
2701                 BEGIN_RING(2);
2702                 RADEON_WAIT_UNTIL_3D_IDLE();
2703                 ADVANCE_RING();
2704                 break;
2705         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2706                 BEGIN_RING(2);
2707                 RADEON_WAIT_UNTIL_IDLE();
2708                 ADVANCE_RING();
2709                 break;
2710         default:
2711                 return DRM_ERR(EINVAL);
2712         }
2713
2714         return 0;
2715 }
2716
2717 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2718 {
2719         DRM_DEVICE;
2720         drm_radeon_private_t *dev_priv = dev->dev_private;
2721         drm_file_t *filp_priv;
2722         drm_device_dma_t *dma = dev->dma;
2723         drm_buf_t *buf = NULL;
2724         int idx;
2725         drm_radeon_cmd_buffer_t cmdbuf;
2726         drm_radeon_cmd_header_t header;
2727         int orig_nbox, orig_bufsz;
2728         char *kbuf;
2729
2730         LOCK_TEST_WITH_RETURN(dev, filp);
2731
2732         if (!dev_priv) {
2733                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2734                 return DRM_ERR(EINVAL);
2735         }
2736
2737         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2738
2739         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2740                                  (drm_radeon_cmd_buffer_t __user *) data,
2741                                  sizeof(cmdbuf));
2742
2743         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2744         VB_AGE_TEST_WITH_RETURN(dev_priv);
2745
2746         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2747                 return DRM_ERR(EINVAL);
2748         }
2749
2750         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2751          * races between checking values and using those values in other code,
2752          * and simply to avoid a lot of function calls to copy in data.
2753          */
2754         orig_bufsz = cmdbuf.bufsz;
2755         if (orig_bufsz != 0) {
2756                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2757                 if (kbuf == NULL)
2758                         return DRM_ERR(ENOMEM);
2759                 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz))
2760                         return DRM_ERR(EFAULT);
2761                 cmdbuf.buf = kbuf;
2762         }
2763
2764         orig_nbox = cmdbuf.nbox;
2765
2766         while (cmdbuf.bufsz >= sizeof(header)) {
2767                 header.i = *(int *)cmdbuf.buf;
2768                 cmdbuf.buf += sizeof(header);
2769                 cmdbuf.bufsz -= sizeof(header);
2770
2771                 switch (header.header.cmd_type) {
2772                 case RADEON_CMD_PACKET:
2773                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2774                         if (radeon_emit_packets
2775                             (dev_priv, filp_priv, header, &cmdbuf)) {
2776                                 DRM_ERROR("radeon_emit_packets failed\n");
2777                                 goto err;
2778                         }
2779                         break;
2780
2781                 case RADEON_CMD_SCALARS:
2782                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2783                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2784                                 DRM_ERROR("radeon_emit_scalars failed\n");
2785                                 goto err;
2786                         }
2787                         break;
2788
2789                 case RADEON_CMD_VECTORS:
2790                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2791                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2792                                 DRM_ERROR("radeon_emit_vectors failed\n");
2793                                 goto err;
2794                         }
2795                         break;
2796
2797                 case RADEON_CMD_DMA_DISCARD:
2798                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2799                         idx = header.dma.buf_idx;
2800                         if (idx < 0 || idx >= dma->buf_count) {
2801                                 DRM_ERROR("buffer index %d (of %d max)\n",
2802                                           idx, dma->buf_count - 1);
2803                                 goto err;
2804                         }
2805
2806                         buf = dma->buflist[idx];
2807                         if (buf->filp != filp || buf->pending) {
2808                                 DRM_ERROR("bad buffer %p %p %d\n",
2809                                           buf->filp, filp, buf->pending);
2810                                 goto err;
2811                         }
2812
2813                         radeon_cp_discard_buffer(dev, buf);
2814                         break;
2815
2816                 case RADEON_CMD_PACKET3:
2817                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2818                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2819                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2820                                 goto err;
2821                         }
2822                         break;
2823
2824                 case RADEON_CMD_PACKET3_CLIP:
2825                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2826                         if (radeon_emit_packet3_cliprect
2827                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2828                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2829                                 goto err;
2830                         }
2831                         break;
2832
2833                 case RADEON_CMD_SCALARS2:
2834                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2835                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2836                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2837                                 goto err;
2838                         }
2839                         break;
2840
2841                 case RADEON_CMD_WAIT:
2842                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2843                         if (radeon_emit_wait(dev, header.wait.flags)) {
2844                                 DRM_ERROR("radeon_emit_wait failed\n");
2845                                 goto err;
2846                         }
2847                         break;
2848                 default:
2849                         DRM_ERROR("bad cmd_type %d at %p\n",
2850                                   header.header.cmd_type,
2851                                   cmdbuf.buf - sizeof(header));
2852                         goto err;
2853                 }
2854         }
2855
2856         if (orig_bufsz != 0)
2857                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2858         DRM_DEBUG("DONE\n");
2859         COMMIT_RING();
2860         return 0;
2861
2862 err:
2863         if (orig_bufsz != 0)
2864                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2865         return DRM_ERR(EINVAL);
2866 }
2867
2868 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2869 {
2870         DRM_DEVICE;
2871         drm_radeon_private_t *dev_priv = dev->dev_private;
2872         drm_radeon_getparam_t param;
2873         int value;
2874
2875         if (!dev_priv) {
2876                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2877                 return DRM_ERR(EINVAL);
2878         }
2879
2880         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2881                                  sizeof(param));
2882
2883         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2884
2885         switch (param.param) {
2886         case RADEON_PARAM_GART_BUFFER_OFFSET:
2887                 value = dev_priv->gart_buffers_offset;
2888                 break;
2889         case RADEON_PARAM_LAST_FRAME:
2890                 dev_priv->stats.last_frame_reads++;
2891                 value = GET_SCRATCH(0);
2892                 break;
2893         case RADEON_PARAM_LAST_DISPATCH:
2894                 value = GET_SCRATCH(1);
2895                 break;
2896         case RADEON_PARAM_LAST_CLEAR:
2897                 dev_priv->stats.last_clear_reads++;
2898                 value = GET_SCRATCH(2);
2899                 break;
2900         case RADEON_PARAM_IRQ_NR:
2901                 value = dev->irq;
2902                 break;
2903         case RADEON_PARAM_GART_BASE:
2904                 value = dev_priv->gart_vm_start;
2905                 break;
2906         case RADEON_PARAM_REGISTER_HANDLE:
2907                 value = dev_priv->mmio_offset;
2908                 break;
2909         case RADEON_PARAM_STATUS_HANDLE:
2910                 value = dev_priv->ring_rptr_offset;
2911                 break;
2912 #if BITS_PER_LONG == 32
2913                 /*
2914                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2915                  * pointer which can't fit into an int-sized variable.  According to
2916                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2917                  * not supporting it shouldn't be a problem.  If the same functionality
2918                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2919                  * so backwards-compatibility for the embedded platforms can be
2920                  * maintained.  --davidm 4-Feb-2004.
2921                  */
2922         case RADEON_PARAM_SAREA_HANDLE:
2923                 /* The lock is the first dword in the sarea. */
2924                 value = (long)dev->lock.hw_lock;
2925                 break;
2926 #endif
2927         case RADEON_PARAM_GART_TEX_HANDLE:
2928                 value = dev_priv->gart_textures_offset;
2929                 break;
2930         default:
2931                 return DRM_ERR(EINVAL);
2932         }
2933
2934         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
2935                 DRM_ERROR("copy_to_user\n");
2936                 return DRM_ERR(EFAULT);
2937         }
2938
2939         return 0;
2940 }
2941
2942 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
2943 {
2944         DRM_DEVICE;
2945         drm_radeon_private_t *dev_priv = dev->dev_private;
2946         drm_file_t *filp_priv;
2947         drm_radeon_setparam_t sp;
2948         struct drm_radeon_driver_file_fields *radeon_priv;
2949
2950         if (!dev_priv) {
2951                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2952                 return DRM_ERR(EINVAL);
2953         }
2954
2955         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2956
2957         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
2958                                  sizeof(sp));
2959
2960         switch (sp.param) {
2961         case RADEON_SETPARAM_FB_LOCATION:
2962                 radeon_priv = filp_priv->driver_priv;
2963                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2964                 break;
2965         case RADEON_SETPARAM_SWITCH_TILING:
2966                 if (sp.value == 0) {
2967                         DRM_DEBUG( "color tiling disabled\n" );
2968                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2969                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2970                         dev_priv->sarea_priv->tiling_enabled = 0;
2971                 }
2972                 else if (sp.value == 1) {
2973                         DRM_DEBUG( "color tiling enabled\n" );
2974                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
2975                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
2976                         dev_priv->sarea_priv->tiling_enabled = 1;
2977                 }
2978                 break;
2979         default:
2980                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
2981                 return DRM_ERR(EINVAL);
2982         }
2983
2984         return 0;
2985 }
2986
2987 /* When a client dies:
2988  *    - Check for and clean up flipped page state
2989  *    - Free any alloced GART memory.
2990  *    - Free any alloced radeon surfaces.
2991  *
2992  * DRM infrastructure takes care of reclaiming dma buffers.
2993  */
2994 void radeon_driver_prerelease(drm_device_t * dev, DRMFILE filp)
2995 {
2996         if (dev->dev_private) {
2997                 drm_radeon_private_t *dev_priv = dev->dev_private;
2998                 if (dev_priv->page_flipping) {
2999                         radeon_do_cleanup_pageflip(dev);
3000                 }
3001                 radeon_mem_release(filp, dev_priv->gart_heap);
3002                 radeon_mem_release(filp, dev_priv->fb_heap);
3003                 radeon_surfaces_release(filp, dev_priv);
3004         }
3005 }
3006
3007 void radeon_driver_pretakedown(drm_device_t * dev)
3008 {
3009         radeon_do_release(dev);
3010 }
3011
3012 int radeon_driver_open_helper(drm_device_t * dev, drm_file_t * filp_priv)
3013 {
3014         drm_radeon_private_t *dev_priv = dev->dev_private;
3015         struct drm_radeon_driver_file_fields *radeon_priv;
3016
3017         DRM_DEBUG("\n");
3018         radeon_priv =
3019             (struct drm_radeon_driver_file_fields *)
3020             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3021
3022         if (!radeon_priv)
3023                 return -ENOMEM;
3024
3025         filp_priv->driver_priv = radeon_priv;
3026
3027         if (dev_priv)
3028                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3029         else
3030                 radeon_priv->radeon_fb_delta = 0;
3031         return 0;
3032 }
3033
3034 void radeon_driver_free_filp_priv(drm_device_t * dev, drm_file_t * filp_priv)
3035 {
3036         struct drm_radeon_driver_file_fields *radeon_priv =
3037             filp_priv->driver_priv;
3038
3039         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3040 }
3041
3042 drm_ioctl_desc_t radeon_ioctls[] = {
3043         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, 1, 1},
3044         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, 1, 1},
3045         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, 1, 1},
3046         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, 1, 1},
3047         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, 1, 0},
3048         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, 1, 0},
3049         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, 1, 0},
3050         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, 1, 0},
3051         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, 1, 0},
3052         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, 1, 0},
3053         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, 1, 0},
3054         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, 1, 0},
3055         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, 1, 0},
3056         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, 1, 0},
3057         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, 1, 1},
3058         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, 1, 0},
3059         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, 1, 0},
3060         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, 1, 0},
3061         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, 1, 0},
3062         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, 1, 0},
3063         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, 1, 0},
3064         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, 1, 1},
3065         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, 1, 0},
3066         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, 1, 0},
3067         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, 1, 0},
3068         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, 1, 0},
3069         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, 1, 0}
3070 };
3071
3072 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);