From adab7f16ffd3ea4c52e8d07f40ca6ae4868c3706 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 2 Nov 2017 00:05:15 +0100 Subject: [PATCH] radeonsi: don't map big VRAM buffers for the first upload directly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeon/r600_buffer_common.c | 20 ++++++++++++++++++++ src/gallium/drivers/radeon/r600_pipe_common.h | 1 + 2 files changed, 21 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 92521f47792..519e52ecfa3 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -194,6 +194,7 @@ void si_init_resource_fields(struct r600_common_screen *rscreen, if (res->domains & RADEON_DOMAIN_VRAM) { res->vram_usage = size; + res->max_forced_staging_uploads = res->b.max_forced_staging_uploads = rscreen->info.has_dedicated_vram && size >= rscreen->info.vram_vis_size / 4 ? 1 : 0; @@ -295,6 +296,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx, rdst->gpu_address = rsrc->gpu_address; rdst->b.b.bind = rsrc->b.b.bind; rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads; + rdst->max_forced_staging_uploads = rsrc->max_forced_staging_uploads; rdst->flags = rsrc->flags; assert(rdst->vram_usage == rsrc->vram_usage); @@ -402,6 +404,23 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } + /* If a buffer in VRAM is too large and the range is discarded, don't + * map it directly. This makes sure that the buffer stays in VRAM. + */ + bool force_discard_range = false; + if (usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_DISCARD_RANGE) && + !(usage & PIPE_TRANSFER_PERSISTENT) && + /* Try not to decrement the counter if it's not positive. Still racy, + * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */ + rbuffer->max_forced_staging_uploads > 0 && + p_atomic_dec_return(&rbuffer->max_forced_staging_uploads) >= 0) { + usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_UNSYNCHRONIZED); + usage |= PIPE_TRANSFER_DISCARD_RANGE; + force_discard_range = true; + } + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INVALIDATE))) { @@ -427,6 +446,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, /* Check if mapping this buffer would cause waiting for the GPU. */ if (rbuffer->flags & RADEON_FLAG_SPARSE || + force_discard_range || si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index a45921e72b6..a7fec373fcf 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -174,6 +174,7 @@ struct r600_resource { enum radeon_bo_domain domains; enum radeon_bo_flag flags; unsigned bind_history; + int max_forced_staging_uploads; /* The buffer range which is initialized (with a write transfer, * streamout, DMA, or as a random access target). The rest of -- 2.11.0