src/mesa/drivers/dri/radeon/radeon_dma.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2004 Nicolai Haehnle.
   4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
   5
   6 The Weather Channel (TM) funded Tungsten Graphics to develop the
   7 initial release of the Radeon 8500 driver under the XFree86 license.
   8 This notice must be preserved.
   9
  10 All Rights Reserved.
  11
  12 Permission is hereby granted, free of charge, to any person obtaining a
  13 copy of this software and associated documentation files (the "Software"),
  14 to deal in the Software without restriction, including without limitation
  15 on the rights to use, copy, modify, merge, publish, distribute, sub
  16 license, and/or sell copies of the Software, and to permit persons to whom
  17 the Software is furnished to do so, subject to the following conditions:
  18
  19 The above copyright notice and this permission notice (including the next
  20 paragraph) shall be included in all copies or substantial portions of the
  21 Software.
  22
  23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  29 USE OR OTHER DEALINGS IN THE SOFTWARE.
  30
  31 **************************************************************************/
  32
  33 #include <errno.h>
  34 #include "radeon_common.h"
  35 #include "radeon_fog.h"
  36 #include "main/simple_list.h"
  37
  38 #if defined(USE_X86_ASM)
  39 #define COPY_DWORDS( dst, src, nr )                                     \
  40 do {                                                                    \
  41         int __tmp;                                                      \
  42         __asm__ __volatile__( "rep ; movsl"                             \
  43                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
  44                               : "0" (nr),                               \
  45                                 "D" ((long)dst),                        \
  46                                 "S" ((long)src) );                      \
  47 } while (0)
  48 #else
  49 #define COPY_DWORDS( dst, src, nr )             \
  50 do {                                            \
  51    int j;                                       \
  52    for ( j = 0 ; j < nr ; j++ )                 \
  53       dst[j] = ((int *)src)[j];                 \
  54    dst += nr;                                   \
  55 } while (0)
  56 #endif
  57
  58 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
  59 {
  60         int i;
  61
  62         if (RADEON_DEBUG & RADEON_VERTS)
  63                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  64                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  65
  66         if (stride == 4)
  67                 COPY_DWORDS(out, data, count);
  68         else
  69                 for (i = 0; i < count; i++) {
  70                         out[0] = *(int *)data;
  71                         out++;
  72                         data += stride;
  73                 }
  74 }
  75
  76 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
  77 {
  78         int i;
  79
  80         if (RADEON_DEBUG & RADEON_VERTS)
  81                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
  82                         __FUNCTION__, count, stride, (void *)out, (void *)data);
  83
  84         if (stride == 8)
  85                 COPY_DWORDS(out, data, count * 2);
  86         else
  87                 for (i = 0; i < count; i++) {
  88                         out[0] = *(int *)data;
  89                         out[1] = *(int *)(data + 4);
  90                         out += 2;
  91                         data += stride;
  92                 }
  93 }
  94
  95 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
  96 {
  97         int i;
  98
  99         if (RADEON_DEBUG & RADEON_VERTS)
 100                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 101                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 102
 103         if (stride == 12) {
 104                 COPY_DWORDS(out, data, count * 3);
 105     }
 106         else
 107                 for (i = 0; i < count; i++) {
 108                         out[0] = *(int *)data;
 109                         out[1] = *(int *)(data + 4);
 110                         out[2] = *(int *)(data + 8);
 111                         out += 3;
 112                         data += stride;
 113                 }
 114 }
 115
 116 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
 117 {
 118         int i;
 119
 120         if (RADEON_DEBUG & RADEON_VERTS)
 121                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
 122                         __FUNCTION__, count, stride, (void *)out, (void *)data);
 123
 124         if (stride == 16)
 125                 COPY_DWORDS(out, data, count * 4);
 126         else
 127                 for (i = 0; i < count; i++) {
 128                         out[0] = *(int *)data;
 129                         out[1] = *(int *)(data + 4);
 130                         out[2] = *(int *)(data + 8);
 131                         out[3] = *(int *)(data + 12);
 132                         out += 4;
 133                         data += stride;
 134                 }
 135 }
 136
 137 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
 138                          const GLvoid * data, int size, int stride, int count)
 139 {
 140         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 141         uint32_t *out;
 142
 143         if (stride == 0) {
 144                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
 145                 count = 1;
 146                 aos->stride = 0;
 147         } else {
 148                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 149                 aos->stride = size;
 150         }
 151
 152         aos->components = size;
 153         aos->count = count;
 154
 155         radeon_bo_map(aos->bo, 1);
 156         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
 157         switch (size) {
 158         case 1: radeonEmitVec4(out, data, stride, count); break;
 159         case 2: radeonEmitVec8(out, data, stride, count); break;
 160         case 3: radeonEmitVec12(out, data, stride, count); break;
 161         case 4: radeonEmitVec16(out, data, stride, count); break;
 162         default:
 163                 assert(0);
 164                 break;
 165         }
 166         radeon_bo_unmap(aos->bo);
 167 }
 168
 169 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
 170                          GLvoid *data, int stride, int count)
 171 {
 172         int i;
 173         float *out;
 174         int size = 1;
 175         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 176
 177         if (RADEON_DEBUG & RADEON_VERTS)
 178                 fprintf(stderr, "%s count %d stride %d\n",
 179                         __FUNCTION__, count, stride);
 180
 181         if (stride == 0) {
 182                 radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
 183                 count = 1;
 184                 aos->stride = 0;
 185         } else {
 186                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
 187                 aos->stride = size;
 188         }
 189
 190         aos->components = size;
 191         aos->count = count;
 192
 193         /* Emit the data */
 194         radeon_bo_map(aos->bo, 1);
 195         out = (float*)((char*)aos->bo->ptr + aos->offset);
 196         for (i = 0; i < count; i++) {
 197                 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
 198                 out++;
 199                 data += stride;
 200         }
 201         radeon_bo_unmap(aos->bo);
 202 }
 203
 204 void radeon_init_dma(radeonContextPtr rmesa)
 205 {
 206         make_empty_list(&rmesa->dma.free);
 207         make_empty_list(&rmesa->dma.wait);
 208         make_empty_list(&rmesa->dma.reserved);
 209         rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
 210 }
 211
 212 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 213 {
 214         struct radeon_dma_bo *dma_bo = NULL;
 215         /* we set minimum sizes to at least requested size
 216            aligned to next 16 bytes. */
 217         if (size > rmesa->dma.minimum_size)
 218                 rmesa->dma.minimum_size = (size + 15) & (~15);
 219
 220         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
 221                         __FUNCTION__, size, rmesa->dma.minimum_size);
 222
 223         if (is_empty_list(&rmesa->dma.free)
 224               || last_elem(&rmesa->dma.free)->bo->size < size) {
 225                 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
 226                 assert(dma_bo);
 227
 228 again_alloc:
 229                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
 230                                             0, rmesa->dma.minimum_size, 4,
 231                                             RADEON_GEM_DOMAIN_GTT, 0);
 232
 233                 if (!dma_bo->bo) {
 234                         rcommonFlushCmdBuf(rmesa, __FUNCTION__);
 235                         goto again_alloc;
 236                 }
 237                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 238         } else {
 239                 /* We push and pop buffers from end of list so we can keep
 240                    counter on unused buffers for later freeing them from
 241                    begin of list */
 242                 dma_bo = last_elem(&rmesa->dma.free);
 243                 remove_from_list(dma_bo);
 244                 insert_at_head(&rmesa->dma.reserved, dma_bo);
 245         }
 246
 247         rmesa->dma.current_used = 0;
 248         rmesa->dma.current_vertexptr = 0;
 249
 250         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
 251                                           first_elem(&rmesa->dma.reserved)->bo,
 252                                           RADEON_GEM_DOMAIN_GTT, 0))
 253                 fprintf(stderr,"failure to revalidate BOs - badness\n");
 254
 255         if (is_empty_list(&rmesa->dma.reserved)) {
 256         /* Cmd buff have been flushed in radeon_revalidate_bos */
 257                 goto again_alloc;
 258         }
 259         radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
 260 }
 261
 262 /* Allocates a region from rmesa->dma.current.  If there isn't enough
 263  * space in current, grab a new buffer (and discard what was left of current)
 264  */
 265 void radeonAllocDmaRegion(radeonContextPtr rmesa,
 266                           struct radeon_bo **pbo, int *poffset,
 267                           int bytes, int alignment)
 268 {
 269         if (RADEON_DEBUG & RADEON_IOCTL)
 270                 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
 271
 272         if (rmesa->dma.flush)
 273                 rmesa->dma.flush(rmesa->glCtx);
 274
 275         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
 276
 277         alignment--;
 278         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
 279
 280         if (is_empty_list(&rmesa->dma.reserved)
 281                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
 282                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 283
 284         *poffset = rmesa->dma.current_used;
 285         *pbo = first_elem(&rmesa->dma.reserved)->bo;
 286         radeon_bo_ref(*pbo);
 287
 288         /* Always align to at least 16 bytes */
 289         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
 290         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 291
 292         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
 293 }
 294
 295 void radeonFreeDmaRegions(radeonContextPtr rmesa)
 296 {
 297         struct radeon_dma_bo *dma_bo;
 298         struct radeon_dma_bo *temp;
 299         if (RADEON_DEBUG & RADEON_DMA)
 300                 fprintf(stderr, "%s\n", __FUNCTION__);
 301
 302         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 303                 remove_from_list(dma_bo);
 304                 radeon_bo_unref(dma_bo->bo);
 305                 FREE(dma_bo);
 306         }
 307
 308         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 309                 remove_from_list(dma_bo);
 310                 radeon_bo_unref(dma_bo->bo);
 311                 FREE(dma_bo);
 312         }
 313
 314         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 315                 remove_from_list(dma_bo);
 316                 radeon_bo_unref(dma_bo->bo);
 317                 FREE(dma_bo);
 318         }
 319 }
 320
 321 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
 322 {
 323         if (is_empty_list(&rmesa->dma.reserved))
 324                 return;
 325
 326         if (RADEON_DEBUG & RADEON_IOCTL)
 327                 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
 328         rmesa->dma.current_used -= return_bytes;
 329         rmesa->dma.current_vertexptr = rmesa->dma.current_used;
 330 }
 331
 332 static int radeon_bo_is_idle(struct radeon_bo* bo)
 333 {
 334         uint32_t domain;
 335         int ret = radeon_bo_is_busy(bo, &domain);
 336         if (ret == -EINVAL) {
 337                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
 338                         "This may cause small performance drop for you.\n");
 339         }
 340         return ret != -EBUSY;
 341 }
 342
 343 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 344 {
 345         struct radeon_dma_bo *dma_bo;
 346         struct radeon_dma_bo *temp;
 347         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
 348         const int time = rmesa->dma.free.expire_counter;
 349
 350         if (RADEON_DEBUG & RADEON_DMA) {
 351                 size_t free = 0,
 352                        wait = 0,
 353                        reserved = 0;
 354                 foreach(dma_bo, &rmesa->dma.free)
 355                         ++free;
 356
 357                 foreach(dma_bo, &rmesa->dma.wait)
 358                         ++wait;
 359
 360                 foreach(dma_bo, &rmesa->dma.reserved)
 361                         ++reserved;
 362
 363                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
 364                       __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
 365         }
 366
 367         /* move waiting bos to free list.
 368            wait list provides gpu time to handle data before reuse */
 369         foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 370                 if (dma_bo->expire_counter == time) {
 371                         WARN_ONCE("Leaking dma buffer object!\n");
 372                         radeon_bo_unref(dma_bo->bo);
 373                         remove_from_list(dma_bo);
 374                         FREE(dma_bo);
 375                         continue;
 376                 }
 377                 /* free objects that are too small to be used because of large request */
 378                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 379                    radeon_bo_unref(dma_bo->bo);
 380                    remove_from_list(dma_bo);
 381                    FREE(dma_bo);
 382                    continue;
 383                 }
 384                 if (!radeon_bo_is_idle(dma_bo->bo)) {
 385                         break;
 386                 }
 387                 remove_from_list(dma_bo);
 388                 dma_bo->expire_counter = expire_at;
 389                 insert_at_tail(&rmesa->dma.free, dma_bo);
 390         }
 391
 392         /* move reserved to wait list */
 393         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
 394                 radeon_bo_unmap(dma_bo->bo);
 395                 /* free objects that are too small to be used because of large request */
 396                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 397                    radeon_bo_unref(dma_bo->bo);
 398                    remove_from_list(dma_bo);
 399                    FREE(dma_bo);
 400                    continue;
 401                 }
 402                 remove_from_list(dma_bo);
 403                 dma_bo->expire_counter = expire_at;
 404                 insert_at_tail(&rmesa->dma.wait, dma_bo);
 405         }
 406
 407         /* free bos that have been unused for some time */
 408         foreach_s(dma_bo, temp, &rmesa->dma.free) {
 409                 if (dma_bo->expire_counter != time)
 410                         break;
 411                 remove_from_list(dma_bo);
 412                 radeon_bo_unref(dma_bo->bo);
 413                 FREE(dma_bo);
 414         }
 415
 416 }
 417
 418
 419 /* Flush vertices in the current dma region.
 420  */
 421 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )
 422 {
 423         radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 424         struct radeon_dma *dma = &rmesa->dma;
 425
 426         if (RADEON_DEBUG & RADEON_IOCTL)
 427                 fprintf(stderr, "%s\n", __FUNCTION__);
 428         dma->flush = NULL;
 429
 430         radeon_bo_unmap(rmesa->swtcl.bo);
 431
 432         if (!is_empty_list(&dma->reserved)) {
 433             GLuint current_offset = dma->current_used;
 434
 435             assert (dma->current_used +
 436                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 437                     dma->current_vertexptr);
 438
 439             if (dma->current_used != dma->current_vertexptr) {
 440                     dma->current_used = dma->current_vertexptr;
 441
 442                     rmesa->vtbl.swtcl_flush(ctx, current_offset);
 443             }
 444             rmesa->swtcl.numverts = 0;
 445         }
 446         radeon_bo_unref(rmesa->swtcl.bo);
 447         rmesa->swtcl.bo = NULL;
 448 }
 449 /* Alloc space in the current dma region.
 450  */
 451 void *
 452 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
 453 {
 454         GLuint bytes = vsize * nverts;
 455         void *head;
 456         if (RADEON_DEBUG & RADEON_IOCTL)
 457                 fprintf(stderr, "%s\n", __FUNCTION__);
 458
 459         if(is_empty_list(&rmesa->dma.reserved)
 460               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
 461                 if (rmesa->dma.flush) {
 462                         rmesa->dma.flush(rmesa->glCtx);
 463                 }
 464
 465                 radeonRefillCurrentDmaRegion(rmesa, bytes);
 466
 467                 return NULL;
 468         }
 469
 470         if (!rmesa->dma.flush) {
 471                 /* if cmdbuf flushed DMA restart */
 472                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
 473                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
 474         }
 475
 476         ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
 477         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
 478         ASSERT( rmesa->dma.current_used +
 479                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
 480                 rmesa->dma.current_vertexptr );
 481
 482         if (!rmesa->swtcl.bo) {
 483                 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
 484                 radeon_bo_ref(rmesa->swtcl.bo);
 485                 radeon_bo_map(rmesa->swtcl.bo, 1);
 486         }
 487
 488         head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
 489         rmesa->dma.current_vertexptr += bytes;
 490         rmesa->swtcl.numverts += nverts;
 491         return head;
 492 }
 493
 494 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
 495 {
 496    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
 497    int i;
 498         if (RADEON_DEBUG & RADEON_IOCTL)
 499                 fprintf(stderr, "%s\n", __FUNCTION__);
 500
 501    if (radeon->dma.flush) {
 502        radeon->dma.flush(radeon->glCtx);
 503    }
 504    for (i = 0; i < radeon->tcl.aos_count; i++) {
 505       if (radeon->tcl.aos[i].bo) {
 506          radeon_bo_unref(radeon->tcl.aos[i].bo);
 507          radeon->tcl.aos[i].bo = NULL;
 508
 509       }
 510    }
 511 }