2 * Copyright 2008 Jerome Glisse.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Jerome Glisse <glisse@freedesktop.org>
28 #include "radeon_drm.h"
29 #include "radeon_drv.h"
32 int radeon_cs2_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
34 struct drm_radeon_cs_parser parser;
35 struct drm_radeon_private *dev_priv = dev->dev_private;
36 struct drm_radeon_cs2 *cs = data;
38 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
39 uint64_t *chunk_array;
40 uint64_t *chunk_array_ptr;
44 /* set command stream id to 0 which is fake id */
48 if (dev_priv == NULL) {
49 DRM_ERROR("called with no initialization\n");
52 if (!cs->num_chunks) {
57 chunk_array = drm_calloc(cs->num_chunks, sizeof(uint64_t), DRM_MEM_DRIVER);
62 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
64 if (DRM_COPY_FROM_USER(chunk_array, chunk_array_ptr, sizeof(uint64_t)*cs->num_chunks)) {
70 parser.file_priv = fpriv;
71 parser.reloc_index = -1;
73 parser.num_chunks = cs->num_chunks;
74 /* copy out the chunk headers */
75 parser.chunks = drm_calloc(parser.num_chunks, sizeof(struct drm_radeon_kernel_chunk), DRM_MEM_DRIVER);
80 for (i = 0; i < parser.num_chunks; i++) {
81 struct drm_radeon_cs_chunk user_chunk;
83 chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
85 if (DRM_COPY_FROM_USER(&user_chunk, chunk_ptr, sizeof(struct drm_radeon_cs_chunk))){
89 parser.chunks[i].chunk_id = user_chunk.chunk_id;
91 if (parser.chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS)
92 parser.reloc_index = i;
94 if (parser.chunks[i].chunk_id == RADEON_CHUNK_ID_IB)
97 if (parser.chunks[i].chunk_id == RADEON_CHUNK_ID_OLD) {
99 parser.reloc_index = -1;
102 parser.chunks[i].length_dw = user_chunk.length_dw;
103 parser.chunks[i].chunk_data = (uint32_t *)(unsigned long)user_chunk.chunk_data;
105 parser.chunks[i].kdata = NULL;
106 size = parser.chunks[i].length_dw * sizeof(uint32_t);
108 switch(parser.chunks[i].chunk_id) {
109 case RADEON_CHUNK_ID_IB:
110 case RADEON_CHUNK_ID_OLD:
115 case RADEON_CHUNK_ID_RELOCS:
117 parser.chunks[i].kdata = drm_alloc(size, DRM_MEM_DRIVER);
118 if (!parser.chunks[i].kdata) {
123 if (DRM_COPY_FROM_USER(parser.chunks[i].kdata, parser.chunks[i].chunk_data, size)) {
128 parser.chunks[i].kdata = NULL;
133 DRM_DEBUG("chunk %d %d %d %p\n", i, parser.chunks[i].chunk_id, parser.chunks[i].length_dw,
134 parser.chunks[i].chunk_data);
138 if (parser.chunks[parser.ib_index].length_dw > (16 * 1024)) {
139 DRM_ERROR("cs->dwords too big: %d\n", parser.chunks[parser.ib_index].length_dw);
145 r = dev_priv->cs.ib_get(&parser);
147 DRM_ERROR("ib_get failed\n");
151 /* now parse command stream */
152 r = dev_priv->cs.parse(&parser);
157 /* emit cs id sequence */
158 dev_priv->cs.id_emit(&parser, &cs_id);
163 dev_priv->cs.ib_free(&parser);
165 for (i = 0; i < parser.num_chunks; i++) {
166 if (parser.chunks[i].kdata)
167 drm_free(parser.chunks[i].kdata, parser.chunks[i].length_dw * sizeof(uint32_t), DRM_MEM_DRIVER);
170 drm_free(parser.chunks, sizeof(struct drm_radeon_kernel_chunk)*parser.num_chunks, DRM_MEM_DRIVER);
171 drm_free(chunk_array, sizeof(uint64_t)*parser.num_chunks, DRM_MEM_DRIVER);
176 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
178 struct drm_radeon_cs_parser parser;
179 struct drm_radeon_private *dev_priv = dev->dev_private;
180 struct drm_radeon_cs *cs = data;
181 uint32_t *packets = NULL;
183 uint32_t card_offset;
186 struct drm_radeon_kernel_chunk chunk_fake[1];
188 /* set command stream id to 0 which is fake id */
192 if (dev_priv == NULL) {
193 DRM_ERROR("called with no initialization\n");
199 /* limit cs to 64K ib */
200 if (cs->dwords > (16 * 1024)) {
203 /* copy cs from userspace maybe we should copy into ib to save
204 * one copy but ib will be mapped wc so not good for cmd checking
205 * somethings worth testing i guess (Jerome)
207 size = cs->dwords * sizeof(uint32_t);
208 packets = drm_alloc(size, DRM_MEM_DRIVER);
209 if (packets == NULL) {
212 if (DRM_COPY_FROM_USER(packets, (void __user *)(unsigned long)cs->packets, size)) {
217 chunk_fake[0].chunk_id = RADEON_CHUNK_ID_OLD;
218 chunk_fake[0].length_dw = cs->dwords;
219 chunk_fake[0].kdata = packets;
222 parser.file_priv = fpriv;
223 parser.num_chunks = 1;
224 parser.chunks = chunk_fake;
226 parser.reloc_index = -1;
229 r = dev_priv->cs.ib_get(&parser);
234 /* now parse command stream */
235 r = dev_priv->cs.parse(&parser);
240 /* emit cs id sequence */
241 dev_priv->cs.id_emit(&parser, &cs_id);
246 dev_priv->cs.ib_free(&parser);
247 drm_free(packets, size, DRM_MEM_DRIVER);
252 static int radeon_nomm_relocate(struct drm_radeon_cs_parser *parser, uint32_t *reloc, uint32_t *offset)
258 #define RELOC_SIZE_NEW 0
259 #define RADEON_2D_OFFSET_MASK 0x3fffff
261 static __inline__ int radeon_cs_relocate_packet0(struct drm_radeon_cs_parser *parser, uint32_t offset_dw)
263 struct drm_device *dev = parser->dev;
264 drm_radeon_private_t *dev_priv = dev->dev_private;
265 uint32_t hdr, reg, val, packet3_hdr;
266 uint32_t tmp, offset;
267 struct drm_radeon_kernel_chunk *ib_chunk;
270 ib_chunk = &parser->chunks[parser->ib_index];
271 // if (parser->reloc_index == -1)
274 hdr = ib_chunk->kdata[offset_dw];
275 reg = (hdr & R300_CP_PACKET0_REG_MASK) << 2;
276 val = ib_chunk->kdata[offset_dw + 1];
277 packet3_hdr = ib_chunk->kdata[offset_dw + 2];
279 /* this is too strict we may want to expand the length in the future and have
280 old kernels ignore it. */
281 if (parser->reloc_index == -1) {
282 if (packet3_hdr != (RADEON_CP_PACKET3 | RADEON_CP_NOP | (RELOC_SIZE << 16))) {
283 DRM_ERROR("Packet 3 was %x should have been %x: reg is %x\n", packet3_hdr, RADEON_CP_PACKET3 | RADEON_CP_NOP | (RELOC_SIZE << 16), reg);
287 if (packet3_hdr != (RADEON_CP_PACKET3 | RADEON_CP_NOP | (RELOC_SIZE_NEW << 16))) {
288 DRM_ERROR("Packet 3 was %x should have been %x: reg is %x\n", packet3_hdr, RADEON_CP_PACKET3 | RADEON_CP_NOP | (RELOC_SIZE_NEW << 16), reg);
295 case RADEON_DST_PITCH_OFFSET:
296 case RADEON_SRC_PITCH_OFFSET:
297 /* pass in the start of the reloc */
298 ret = dev_priv->cs.relocate(parser, ib_chunk->kdata + offset_dw + 2, &offset);
301 tmp = (val & RADEON_2D_OFFSET_MASK) << 10;
302 val &= ~RADEON_2D_OFFSET_MASK;
307 case RADEON_RB3D_COLOROFFSET:
308 case R300_RB3D_COLOROFFSET0:
309 case R300_ZB_DEPTHOFFSET:
310 case R300_TX_OFFSET_0:
311 case R300_TX_OFFSET_0+4:
312 case R200_PP_TXOFFSET_0:
313 case R200_PP_TXOFFSET_1:
314 case RADEON_PP_TXOFFSET_0:
315 case RADEON_PP_TXOFFSET_1:
316 ret = dev_priv->cs.relocate(parser. ib_chunk->kdata + offset_dw + 2, &offset);
320 offset &= 0xffffffe0;
327 ib_chunk->kdata[offset_dw + 1] = val;
331 static int radeon_cs_relocate_packet3(struct drm_radeon_cs_parser *parser,
334 drm_radeon_private_t *dev_priv = parser->dev->dev_private;
335 uint32_t hdr, num_dw, reg;
336 uint32_t offset, val, tmp;
338 struct drm_radeon_kernel_chunk *ib_chunk;
340 ib_chunk = &parser->chunks[parser->ib_index];
341 // if (parser->reloc_index == -1)
344 hdr = ib_chunk->kdata[offset_dw];
345 num_dw = (hdr & RADEON_CP_PACKET_COUNT_MASK) >> 16;
349 case RADEON_CNTL_HOSTDATA_BLT:
351 val = ib_chunk->kdata[offset_dw + 2];
352 ret = dev_priv->cs.relocate(parser, ib_chunk->kdata + offset_dw + num_dw + 2, &offset);
356 tmp = (val & RADEON_2D_OFFSET_MASK) << 10;
357 val &= ~RADEON_2D_OFFSET_MASK;
362 ib_chunk->kdata[offset_dw + 2] = val;
370 int radeon_cs_packet0(struct drm_radeon_cs_parser *parser, uint32_t offset_dw)
372 uint32_t hdr, num_dw, reg;
376 hdr = parser->chunks[parser->ib_index].kdata[offset_dw];
377 num_dw = ((hdr & RADEON_CP_PACKET_COUNT_MASK) >> 16) + 2;
378 reg = (hdr & R300_CP_PACKET0_REG_MASK) << 2;
380 while (count_dw < num_dw) {
381 /* need to have something like the r300 validation here -
382 list of allowed registers */
385 ret = r300_check_range(reg, 1);
388 DRM_ERROR("Illegal register %x\n", reg);
393 flags = r300_get_reg_flags(reg);
394 if (flags == MARK_CHECK_OFFSET) {
396 DRM_ERROR("Cannot relocate inside type stream of reg0 packets\n");
400 ret = radeon_cs_relocate_packet0(parser, offset_dw);
403 DRM_DEBUG("need to relocate %x %d\n", reg, flags);
404 /* okay it should be followed by a NOP */
405 } else if (flags == MARK_CHECK_SCISSOR) {
406 DRM_DEBUG("need to validate scissor %x %d\n", reg, flags);
408 DRM_DEBUG("illegal register %x %d\n", reg, flags);
419 int radeon_cs_parse(struct drm_radeon_cs_parser *parser)
422 struct drm_radeon_kernel_chunk *ib_chunk;
423 /* scan the packet for various things */
424 int count_dw = 0, size_dw;
427 ib_chunk = &parser->chunks[parser->ib_index];
428 size_dw = ib_chunk->length_dw;
430 while (count_dw < size_dw && ret == 0) {
431 int hdr = ib_chunk->kdata[count_dw];
432 int num_dw = (hdr & RADEON_CP_PACKET_COUNT_MASK) >> 16;
435 switch (hdr & RADEON_CP_PACKET_MASK) {
436 case RADEON_CP_PACKET0:
437 ret = radeon_cs_packet0(parser, count_dw);
439 case RADEON_CP_PACKET1:
440 case RADEON_CP_PACKET2:
441 reg = hdr & RADEON_CP_PACKET0_REG_MASK;
442 DRM_DEBUG("Packet 1/2: %d %x\n", num_dw, reg);
445 case RADEON_CP_PACKET3:
449 case RADEON_CNTL_HOSTDATA_BLT:
450 radeon_cs_relocate_packet3(parser, count_dw);
453 case RADEON_CNTL_BITBLT_MULTI:
454 case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
455 case RADEON_CP_INDX_BUFFER:
456 DRM_ERROR("need relocate packet 3 for %x\n", reg);
459 case RADEON_3D_DRAW_IMMD: /* triggers drawing using in-packet vertex data */
460 case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */
461 case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */
462 case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */
463 case RADEON_WAIT_FOR_IDLE:
467 DRM_ERROR("unknown packet 3 %x\n", reg);
473 count_dw += num_dw+2;
480 /* copy the packet into the IB */
481 memcpy(parser->ib, ib_chunk->kdata, ib_chunk->length_dw * sizeof(uint32_t));
483 /* read back last byte to flush WC buffers */
484 rb = readl((parser->ib + (ib_chunk->length_dw-1) * sizeof(uint32_t)));
489 uint32_t radeon_cs_id_get(struct drm_radeon_private *radeon)
491 /* FIXME: protect with a spinlock */
492 /* FIXME: check if wrap affect last reported wrap & sequence */
493 radeon->cs.id_scnt = (radeon->cs.id_scnt + 1) & 0x00FFFFFF;
494 if (!radeon->cs.id_scnt) {
495 /* increment wrap counter */
496 radeon->cs.id_wcnt += 0x01000000;
497 /* valid sequence counter start at 1 */
498 radeon->cs.id_scnt = 1;
500 return (radeon->cs.id_scnt | radeon->cs.id_wcnt);
503 void r100_cs_id_emit(struct drm_radeon_cs_parser *parser, uint32_t *id)
505 drm_radeon_private_t *dev_priv = parser->dev->dev_private;
508 dev_priv->irq_emitted = radeon_update_breadcrumb(parser->dev);
509 /* ISYNC_CNTL should have CPSCRACTH bit set */
510 *id = radeon_cs_id_get(dev_priv);
511 /* emit id in SCRATCH4 (not used yet in old drm) */
513 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
514 OUT_RING(parser->card_offset);
515 OUT_RING(parser->chunks[parser->ib_index].length_dw);
516 OUT_RING(CP_PACKET2());
517 OUT_RING(CP_PACKET0(RADEON_SCRATCH_REG4, 0));
519 OUT_RING_REG(RADEON_LAST_SWI_REG, dev_priv->irq_emitted);
520 OUT_RING_REG(RADEON_GEN_INT_STATUS, RADEON_SW_INT_FIRE);
526 void r300_cs_id_emit(struct drm_radeon_cs_parser *parser, uint32_t *id)
528 drm_radeon_private_t *dev_priv = parser->dev->dev_private;
532 dev_priv->irq_emitted = radeon_update_breadcrumb(parser->dev);
534 /* ISYNC_CNTL should not have CPSCRACTH bit set */
535 *id = radeon_cs_id_get(dev_priv);
537 /* emit id in SCRATCH6 */
539 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
540 OUT_RING(parser->card_offset);
541 OUT_RING(parser->chunks[parser->ib_index].length_dw);
542 OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
544 for (i = 0; i < 11; i++) /* emit fillers like fglrx */
545 OUT_RING(CP_PACKET2());
550 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH);
551 OUT_RING(CP_PACKET0(R300_CP_RESYNC_ADDR, 1));
554 OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FINISH|R300_RB3D_DC_FLUSH);
555 /* emit inline breadcrumb for TTM fencing */
557 RADEON_WAIT_UNTIL_3D_IDLE();
558 OUT_RING_REG(RADEON_LAST_SWI_REG, dev_priv->irq_emitted);
560 OUT_RING(CP_PACKET0(R300_CP_RESYNC_ADDR, 1));
561 OUT_RING(3); /* breadcrumb register */
562 OUT_RING(dev_priv->irq_emitted);
563 OUT_RING(CP_PACKET2());
565 OUT_RING_REG(RADEON_GEN_INT_STATUS, RADEON_SW_INT_FIRE);
566 OUT_RING(CP_PACKET2());
567 OUT_RING(CP_PACKET2());
568 OUT_RING(CP_PACKET2());
574 uint32_t r100_cs_id_last_get(struct drm_device *dev)
576 drm_radeon_private_t *dev_priv = dev->dev_private;
578 return GET_SCRATCH(4);
581 uint32_t r300_cs_id_last_get(struct drm_device *dev)
583 drm_radeon_private_t *dev_priv = dev->dev_private;
585 return GET_SCRATCH(6);
588 int radeon_cs_init(struct drm_device *dev)
590 drm_radeon_private_t *dev_priv = dev->dev_private;
592 if (dev_priv->chip_family < CHIP_RV280) {
593 dev_priv->cs.id_emit = r100_cs_id_emit;
594 dev_priv->cs.id_last_get = r100_cs_id_last_get;
595 } else if (dev_priv->chip_family < CHIP_R600) {
596 dev_priv->cs.id_emit = r300_cs_id_emit;
597 dev_priv->cs.id_last_get = r300_cs_id_last_get;
600 dev_priv->cs.parse = radeon_cs_parse;
601 /* ib get depends on memory manager or not so memory manager */
602 dev_priv->cs.relocate = radeon_nomm_relocate;