From 9adcd2d80aceec90b9c3712b53d8e7839dc5634b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Jun 2015 12:36:47 -0700 Subject: [PATCH] vc4: Move RCL generation into the kernel. There weren't that many variations of RCL generation, and this lets us skip all the in-kernel validation for what we generated. --- src/gallium/drivers/vc4/Makefile.sources | 1 + src/gallium/drivers/vc4/kernel/vc4_drv.h | 28 +- src/gallium/drivers/vc4/kernel/vc4_gem.c | 70 ++-- src/gallium/drivers/vc4/kernel/vc4_render_cl.c | 446 +++++++++++++++++++++++++ src/gallium/drivers/vc4/kernel/vc4_validate.c | 306 +++-------------- src/gallium/drivers/vc4/vc4_blit.c | 107 ++---- src/gallium/drivers/vc4/vc4_context.c | 289 ++-------------- src/gallium/drivers/vc4/vc4_context.h | 15 +- src/gallium/drivers/vc4/vc4_draw.c | 2 + src/gallium/drivers/vc4/vc4_drm.h | 40 ++- src/gallium/drivers/vc4/vc4_job.c | 97 +++++- 11 files changed, 725 insertions(+), 676 deletions(-) create mode 100644 src/gallium/drivers/vc4/kernel/vc4_render_cl.c diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index edef49353a2..1eb029e67e7 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -2,6 +2,7 @@ C_SOURCES := \ kernel/vc4_drv.h \ kernel/vc4_gem.c \ kernel/vc4_packet.h \ + kernel/vc4_render_cl.c \ kernel/vc4_validate.c \ kernel/vc4_validate_shaders.c \ vc4_blit.c \ diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index 8e9230b8949..83802dd774a 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -88,14 +88,9 @@ struct vc4_exec_info { uint32_t shader_state_count; bool found_tile_binning_mode_config_packet; - bool found_tile_rendering_mode_config_packet; bool found_start_tile_binning_packet; bool found_increment_semaphore_packet; - bool found_wait_on_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; - uint32_t fb_width, fb_height; - uint32_t tile_alloc_init_block_mask; - uint32_t tile_alloc_init_block_last; struct drm_gem_cma_object *tile_alloc_bo; /** @@ -163,13 +158,10 @@ struct vc4_validated_shader_info /* vc4_validate.c */ int -vc4_validate_cl(struct drm_device *dev, - void *validated, - void *unvalidated, - uint32_t len, - bool is_bin, - bool has_bin, - struct vc4_exec_info *exec); +vc4_validate_bin_cl(struct drm_device *dev, + void *validated, + void *unvalidated, + struct vc4_exec_info *exec); int vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); @@ -177,4 +169,16 @@ vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj); +bool vc4_use_bo(struct vc4_exec_info *exec, + uint32_t hindex, + enum vc4_bo_mode mode, + struct drm_gem_cma_object **obj); + +int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); + +bool vc4_check_tex_size(struct vc4_exec_info *exec, + struct drm_gem_cma_object *fbo, + uint32_t offset, uint8_t tiling_format, + uint32_t width, uint32_t height, uint8_t cpp); + #endif /* VC4_DRV_H */ diff --git a/src/gallium/drivers/vc4/kernel/vc4_gem.c b/src/gallium/drivers/vc4/kernel/vc4_gem.c index e559ddd1d4e..e4b7fea5968 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_gem.c +++ b/src/gallium/drivers/vc4/kernel/vc4_gem.c @@ -25,24 +25,26 @@ #include "vc4_drv.h" -int -vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) +/* + * Copies in the user's binning command list and generates the validated bin + * CL, along with associated data (shader records, uniforms). + */ +static int +vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) { struct drm_vc4_submit_cl *args = exec->args; void *temp = NULL; - void *bin, *render; + void *bin; int ret = 0; uint32_t bin_offset = 0; - uint32_t render_offset = bin_offset + args->bin_cl_size; - uint32_t shader_rec_offset = roundup(render_offset + - args->render_cl_size, 16); + uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, + 16); uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; uint32_t exec_size = uniforms_offset + args->uniforms_size; uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * args->shader_rec_count); - if (shader_rec_offset < render_offset || - uniforms_offset < shader_rec_offset || + if (uniforms_offset < shader_rec_offset || exec_size < uniforms_offset || args->shader_rec_count >= (UINT_MAX / sizeof(struct vc4_shader_state)) || @@ -66,7 +68,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; } bin = temp + bin_offset; - render = temp + render_offset; exec->shader_rec_u = temp + shader_rec_offset; exec->uniforms_u = temp + uniforms_offset; exec->shader_state = temp + exec_size; @@ -80,14 +81,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; } - ret = copy_from_user(render, - (void __user *)(uintptr_t)args->render_cl, - args->render_cl_size); - if (ret) { - DRM_ERROR("Failed to copy in render cl\n"); - goto fail; - } - ret = copy_from_user(exec->shader_rec_u, (void __user *)(uintptr_t)args->shader_rec, args->shader_rec_size); @@ -118,7 +111,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) &exec->unref_list); exec->ct0ca = exec->exec_bo->paddr + bin_offset; - exec->ct1ca = exec->exec_bo->paddr + render_offset; exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; @@ -128,23 +120,10 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; exec->uniforms_size = args->uniforms_size; - ret = vc4_validate_cl(dev, - exec->exec_bo->vaddr + bin_offset, - bin, - args->bin_cl_size, - true, - args->bin_cl_size != 0, - exec); - if (ret) - goto fail; - - ret = vc4_validate_cl(dev, - exec->exec_bo->vaddr + render_offset, - render, - args->render_cl_size, - false, - args->bin_cl_size != 0, - exec); + ret = vc4_validate_bin_cl(dev, + exec->exec_bo->vaddr + bin_offset, + bin, + exec); if (ret) goto fail; @@ -155,4 +134,25 @@ fail: return ret; } +int +vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) +{ + int ret = 0; + + if (exec->args->bin_cl_size != 0) { + ret = vc4_get_bcl(dev, exec); + if (ret) + goto fail; + } else { + exec->ct0ca = exec->ct0ea = 0; + } + + ret = vc4_get_rcl(dev, exec); + if (ret) + goto fail; + +fail: + return ret; +} + #endif /* USE_VC4_SIMULATOR */ diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c new file mode 100644 index 00000000000..de6070fec72 --- /dev/null +++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c @@ -0,0 +1,446 @@ +/* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * DOC: Render command list generation + * + * In the VC4 driver, render command list generation is performed by the + * kernel instead of userspace. We do this because validating a + * user-submitted command list is hard to get right and has high CPU overhead, + * while the number of valid configurations for render command lists is + * actually fairly low. + */ + +#include "vc4_drv.h" +#include "vc4_packet.h" + +struct vc4_rcl_setup { + struct drm_gem_cma_object *color_read; + struct drm_gem_cma_object *color_ms_write; + struct drm_gem_cma_object *zs_read; + struct drm_gem_cma_object *zs_write; + + struct drm_gem_cma_object *rcl; + u32 next_offset; +}; + +static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val) +{ + *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 1; +} + +static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val) +{ + *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 2; +} + +static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val) +{ + *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 4; +} + + +/* + * Emits a no-op STORE_TILE_BUFFER_GENERAL. + * + * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of + * some sort before another load is triggered. + */ +static void vc4_store_before_load(struct vc4_rcl_setup *setup) +{ + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR); + rcl_u32(setup, 0); /* no address, since we're in None mode */ +} + +/* + * Emits a PACKET_TILE_COORDINATES if one isn't already pending. + * + * The tile coordinates packet triggers a pending load if there is one, are + * used for clipping during rendering, and determine where loads/stores happen + * relative to their base address. + */ +static void vc4_tile_coordinates(struct vc4_rcl_setup *setup, + uint32_t x, uint32_t y) +{ + rcl_u8(setup, VC4_PACKET_TILE_COORDINATES); + rcl_u8(setup, x); + rcl_u8(setup, y); +} + +static void emit_tile(struct vc4_exec_info *exec, + struct vc4_rcl_setup *setup, + uint8_t x, uint8_t y, bool first, bool last) +{ + bool has_bin = exec->args->bin_cl_size != 0; + + /* Note that the load doesn't actually occur until the + * tile coords packet is processed, and only one load + * may be outstanding at a time. + */ + if (setup->color_read) { + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->color_read.bits); + rcl_u32(setup, + setup->color_read->paddr + + exec->args->color_read.offset); + } + + if (setup->zs_read) { + if (setup->color_read) { + /* Exec previous load. */ + vc4_tile_coordinates(setup, x, y); + vc4_store_before_load(setup); + } + + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->zs_read.bits); + rcl_u32(setup, + setup->zs_read->paddr + exec->args->zs_read.offset); + } + + /* Clipping depends on tile coordinates having been + * emitted, so we always need one here. + */ + vc4_tile_coordinates(setup, x, y); + + /* Wait for the binner before jumping to the first + * tile's lists. + */ + if (first && has_bin) + rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE); + + if (has_bin) { + rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); + rcl_u32(setup, (exec->tile_alloc_bo->paddr + + (y * exec->bin_tiles_x + x) * 32)); + } + + if (setup->zs_write) { + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->zs_write.bits | + (setup->color_ms_write ? + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0)); + rcl_u32(setup, + (setup->zs_write->paddr + exec->args->zs_write.offset) | + ((last && !setup->color_ms_write) ? + VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); + } + + if (setup->color_ms_write) { + if (setup->zs_write) { + /* Reset after previous store */ + vc4_tile_coordinates(setup, x, y); + } + + if (last) + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); + else + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER); + } +} + +static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, + struct vc4_rcl_setup *setup) +{ + bool has_bin = exec->args->bin_cl_size != 0; + uint8_t min_x_tile = exec->args->min_x_tile; + uint8_t min_y_tile = exec->args->min_y_tile; + uint8_t max_x_tile = exec->args->max_x_tile; + uint8_t max_y_tile = exec->args->max_y_tile; + uint8_t xtiles = max_x_tile - min_x_tile + 1; + uint8_t ytiles = max_y_tile - min_y_tile + 1; + uint8_t x, y; + uint32_t size, loop_body_size; + + size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; + loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; + + if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { + size += VC4_PACKET_CLEAR_COLORS_SIZE + + VC4_PACKET_TILE_COORDINATES_SIZE + + VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + } + + if (setup->color_read) { + loop_body_size += (VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE); + } + if (setup->zs_read) { + if (setup->color_read) { + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + } + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; + } + + if (has_bin) { + size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE; + loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE; + } + + if (setup->zs_write) + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; + if (setup->color_ms_write) { + if (setup->zs_write) + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE; + } + size += xtiles * ytiles * loop_body_size; + + setup->rcl = drm_gem_cma_create(dev, size); + if (!setup->rcl) + return -ENOMEM; + list_addtail(&to_vc4_bo(&setup->rcl->base)->unref_head, + &exec->unref_list); + + rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); + rcl_u32(setup, + (setup->color_ms_write ? + (setup->color_ms_write->paddr + + exec->args->color_ms_write.offset) : + 0)); + rcl_u16(setup, exec->args->width); + rcl_u16(setup, exec->args->height); + rcl_u16(setup, exec->args->color_ms_write.bits); + + /* The tile buffer gets cleared when the previous tile is stored. If + * the clear values changed between frames, then the tile buffer has + * stale clear values in it, so we have to do a store in None mode (no + * writes) so that we trigger the tile buffer clear. + */ + if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { + rcl_u8(setup, VC4_PACKET_CLEAR_COLORS); + rcl_u32(setup, exec->args->clear_color[0]); + rcl_u32(setup, exec->args->clear_color[1]); + rcl_u32(setup, exec->args->clear_z); + rcl_u8(setup, exec->args->clear_s); + + vc4_tile_coordinates(setup, 0, 0); + + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE); + rcl_u32(setup, 0); /* no address, since we're in None mode */ + } + + for (y = min_y_tile; y <= max_y_tile; y++) { + for (x = min_x_tile; x <= max_x_tile; x++) { + bool first = (x == min_x_tile && y == min_y_tile); + bool last = (x == max_x_tile && y == max_y_tile); + emit_tile(exec, setup, x, y, first, last); + } + } + + BUG_ON(setup->next_offset != size); + exec->ct1ca = setup->rcl->paddr; + exec->ct1ea = setup->rcl->paddr + setup->next_offset; + + return 0; +} + +static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, + struct drm_gem_cma_object **obj, + struct drm_vc4_submit_rcl_surface *surf) +{ + uint8_t tiling = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_TILING); + uint8_t buffer = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_BUFFER); + uint8_t format = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_FORMAT); + int cpp; + + if (surf->pad != 0) { + DRM_ERROR("Padding unset\n"); + return -EINVAL; + } + + if (surf->hindex == ~0) + return 0; + + if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj)) + return -EINVAL; + + if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK | + VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK | + VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) { + DRM_ERROR("Unknown bits in load/store: 0x%04x\n", + surf->bits); + return -EINVAL; + } + + if (tiling > VC4_TILING_FORMAT_LT) { + DRM_ERROR("Bad tiling format\n"); + return -EINVAL; + } + + if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) { + if (format != 0) { + DRM_ERROR("No color format should be set for ZS\n"); + return -EINVAL; + } + cpp = 4; + } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) { + switch (format) { + case VC4_LOADSTORE_TILE_BUFFER_BGR565: + case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER: + cpp = 2; + break; + case VC4_LOADSTORE_TILE_BUFFER_RGBA8888: + cpp = 4; + break; + default: + DRM_ERROR("Bad tile buffer format\n"); + return -EINVAL; + } + } else { + DRM_ERROR("Bad load/store buffer %d.\n", buffer); + return -EINVAL; + } + + if (surf->offset & 0xf) { + DRM_ERROR("load/store buffer must be 16b aligned.\n"); + return -EINVAL; + } + + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, + exec->args->width, exec->args->height, cpp)) { + return -EINVAL; + } + + return 0; +} + +static int +vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec, + struct drm_gem_cma_object **obj, + struct drm_vc4_submit_rcl_surface *surf) +{ + uint8_t tiling = VC4_GET_FIELD(surf->bits, + VC4_RENDER_CONFIG_MEMORY_FORMAT); + uint8_t format = VC4_GET_FIELD(surf->bits, + VC4_RENDER_CONFIG_FORMAT); + int cpp; + + if (surf->pad != 0) { + DRM_ERROR("Padding unset\n"); + return -EINVAL; + } + + if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK | + VC4_RENDER_CONFIG_FORMAT_MASK)) { + DRM_ERROR("Unknown bits in render config: 0x%04x\n", + surf->bits); + return -EINVAL; + } + + if (surf->hindex == ~0) + return 0; + + if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj)) + return -EINVAL; + + if (tiling > VC4_TILING_FORMAT_LT) { + DRM_ERROR("Bad tiling format\n"); + return -EINVAL; + } + + switch (format) { + case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED: + case VC4_RENDER_CONFIG_FORMAT_BGR565: + cpp = 2; + break; + case VC4_RENDER_CONFIG_FORMAT_RGBA8888: + cpp = 4; + break; + default: + DRM_ERROR("Bad tile buffer format\n"); + return -EINVAL; + } + + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, + exec->args->width, exec->args->height, cpp)) { + return -EINVAL; + } + + return 0; +} + +int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) +{ + struct vc4_rcl_setup setup = {0}; + struct drm_vc4_submit_cl *args = exec->args; + bool has_bin = args->bin_cl_size != 0; + int ret; + + if (args->min_x_tile > args->max_x_tile || + args->min_y_tile > args->max_y_tile) { + DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n", + args->min_x_tile, args->min_y_tile, + args->max_x_tile, args->max_y_tile); + return -EINVAL; + } + + if (has_bin && + (args->max_x_tile > exec->bin_tiles_x || + args->max_y_tile > exec->bin_tiles_y)) { + DRM_ERROR("Render tiles (%d,%d) outside of bin config (%d,%d)\n", + args->max_x_tile, args->max_y_tile, + exec->bin_tiles_x, exec->bin_tiles_y); + return -EINVAL; + } + + ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); + if (ret) + return ret; + + ret = vc4_rcl_ms_surface_setup(exec, &setup.color_ms_write, + &args->color_ms_write); + if (ret) + return ret; + + ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read); + if (ret) + return ret; + + ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write); + if (ret) + return ret; + + /* We shouldn't even have the job submitted to us if there's no + * surface to write out. + */ + if (!setup.color_ms_write && !setup.zs_write) { + DRM_ERROR("RCL requires color or Z/S write\n"); + return -EINVAL; + } + + return vc4_create_rcl_bo(dev, exec, &setup); +} diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 0a74a2c6db7..80b0e653d80 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -94,7 +94,7 @@ size_is_lt(uint32_t width, uint32_t height, int cpp) height <= 4 * utile_height(cpp)); } -static bool +bool vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex, enum vc4_bo_mode mode, @@ -147,10 +147,10 @@ gl_shader_rec_size(uint32_t pointer_bits) return 36 + attribute_count * 8; } -static bool -check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, - uint32_t offset, uint8_t tiling_format, - uint32_t width, uint32_t height, uint8_t cpp) +bool +vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, + uint32_t offset, uint8_t tiling_format, + uint32_t width, uint32_t height, uint8_t cpp) { uint32_t aligned_width, aligned_height, stride, size; uint32_t utile_w = utile_width(cpp); @@ -248,118 +248,6 @@ validate_increment_semaphore(VALIDATE_ARGS) } static int -validate_wait_on_semaphore(VALIDATE_ARGS) -{ - if (exec->found_wait_on_semaphore_packet) { - DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n"); - return -EINVAL; - } - exec->found_wait_on_semaphore_packet = true; - - if (!exec->found_increment_semaphore_packet) { - DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without " - "VC4_PACKET_INCREMENT_SEMAPHORE\n"); - return -EINVAL; - } - - return 0; -} - -static int -validate_branch_to_sublist(VALIDATE_ARGS) -{ - uint32_t offset; - - if (!exec->tile_alloc_bo) { - DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST seen before " - "binner setup\n"); - return -EINVAL; - } - - if (!exec->found_wait_on_semaphore_packet) { - DRM_ERROR("Jumping to tile alloc before binning finished.\n"); - return -EINVAL; - } - - offset = *(uint32_t *)(untrusted + 0); - if (offset & exec->tile_alloc_init_block_mask || - offset > exec->tile_alloc_init_block_last) { - DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial " - "tile allocation space.\n"); - return -EINVAL; - } - - *(uint32_t *)(validated + 0) = exec->tile_alloc_bo->paddr + offset; - - return 0; -} - -/** - * validate_loadstore_tile_buffer_general() - Validation for - * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL and - * VC4_PACKET_STORE_TILE_BUFFER_GENERAL. - * - * The two packets are nearly the same, except for the TLB-clearing management - * bits not being present for loads. Additionally, while stores are executed - * immediately (using the current tile coordinates), loads are queued to be - * executed when the tile coordinates packet occurs. - * - * Note that coordinates packets are validated to be within the declared - * bin_x/y, which themselves are verified to match the rendering-configuration - * FB width and height (which the hardware uses to clip loads and stores). - */ -static int -validate_loadstore_tile_buffer_general(VALIDATE_ARGS) -{ - uint16_t packet_b01 = *(uint16_t *)(untrusted + 0); - struct drm_gem_cma_object *fbo; - uint32_t buffer_type = VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_BUFFER); - uint32_t untrusted_address, offset, cpp; - - switch (buffer_type) { - case VC4_LOADSTORE_TILE_BUFFER_NONE: - return 0; - case VC4_LOADSTORE_TILE_BUFFER_COLOR: - if (VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_FORMAT) == - VC4_LOADSTORE_TILE_BUFFER_RGBA8888) { - cpp = 4; - } else { - cpp = 2; - } - break; - - case VC4_LOADSTORE_TILE_BUFFER_Z: - case VC4_LOADSTORE_TILE_BUFFER_ZS: - cpp = 4; - break; - - default: - DRM_ERROR("Load/store type %d unsupported\n", buffer_type); - return -EINVAL; - } - - if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo)) - return -EINVAL; - - untrusted_address = *(uint32_t *)(untrusted + 2); - offset = untrusted_address & ~0xf; - - if (!check_tex_size(exec, fbo, offset, - VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_TILING), - exec->fb_width, exec->fb_height, cpp)) { - return -EINVAL; - } - - *(uint32_t *)(validated + 2) = (offset + fbo->paddr + - (untrusted_address & 0xf)); - - return 0; -} - -static int validate_indexed_prim_list(VALIDATE_ARGS) { struct drm_gem_cma_object *ib; @@ -552,9 +440,6 @@ validate_tile_binning_config(VALIDATE_ARGS) tile_allocation_size); return -EINVAL; } - exec->tile_alloc_init_block_mask = tile_alloc_init_block_size - 1; - exec->tile_alloc_init_block_last = tile_alloc_init_block_size * - (exec->bin_tiles_x * exec->bin_tiles_y - 1); if (*(uint32_t *)(untrusted + 8) != 0) { DRM_ERROR("TSDA offset != 0 unsupported\n"); @@ -572,141 +457,66 @@ validate_tile_binning_config(VALIDATE_ARGS) } static int -validate_tile_rendering_mode_config(VALIDATE_ARGS) -{ - struct drm_gem_cma_object *fbo; - uint32_t flags, offset, cpp; - - if (exec->found_tile_rendering_mode_config_packet) { - DRM_ERROR("Duplicate VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n"); - return -EINVAL; - } - exec->found_tile_rendering_mode_config_packet = true; - - if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo)) - return -EINVAL; - - exec->fb_width = *(uint16_t *)(untrusted + 4); - exec->fb_height = *(uint16_t *)(untrusted + 6); - - flags = *(uint16_t *)(untrusted + 8); - if (VC4_GET_FIELD(flags, VC4_RENDER_CONFIG_FORMAT) == - VC4_RENDER_CONFIG_FORMAT_RGBA8888) { - cpp = 4; - } else { - cpp = 2; - } - - offset = *(uint32_t *)untrusted; - if (!check_tex_size(exec, fbo, offset, - VC4_GET_FIELD(flags, - VC4_RENDER_CONFIG_MEMORY_FORMAT), - exec->fb_width, exec->fb_height, cpp)) { - return -EINVAL; - } - - *(uint32_t *)validated = fbo->paddr + offset; - - return 0; -} - -static int -validate_tile_coordinates(VALIDATE_ARGS) -{ - uint8_t tile_x = *(uint8_t *)(untrusted + 0); - uint8_t tile_y = *(uint8_t *)(untrusted + 1); - - if (tile_x * 64 >= exec->fb_width || tile_y * 64 >= exec->fb_height) { - DRM_ERROR("Tile coordinates %d,%d > render config %dx%d\n", - tile_x, tile_y, exec->fb_width, exec->fb_height); - return -EINVAL; - } - - return 0; -} - -static int validate_gem_handles(VALIDATE_ARGS) { memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index)); return 0; } -#define VC4_DEFINE_PACKET(packet, bin, render, name, func) \ - [packet] = { bin, render, packet ## _SIZE, name, func } +#define VC4_DEFINE_PACKET(packet, name, func) \ + [packet] = { packet ## _SIZE, name, func } static const struct cmd_info { - bool bin; - bool render; uint16_t len; const char *name; int (*func)(struct vc4_exec_info *exec, void *validated, void *untrusted); } cmd_info[] = { - VC4_DEFINE_PACKET(VC4_PACKET_HALT, 1, 1, "halt", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_NOP, 1, 1, "nop", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, 1, 1, "flush", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, 1, 0, "flush all state", validate_flush_all), - VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, 1, 0, "start tile binning", validate_start_tile_binning), - VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1, 0, "increment semaphore", validate_increment_semaphore), - VC4_DEFINE_PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 0, 1, "wait on semaphore", validate_wait_on_semaphore), - /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but - * we only use it from the render CL in order to jump into the tile - * allocation BO. - */ - VC4_DEFINE_PACKET(VC4_PACKET_BRANCH_TO_SUB_LIST, 0, 1, "branch to sublist", validate_branch_to_sublist), - VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 0, 1, "store MS resolved tile color buffer", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 0, 1, "store MS resolved tile color buffer and EOF", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 0, 1, "Store Tile Buffer General", validate_loadstore_tile_buffer_general), - VC4_DEFINE_PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 0, 1, "Load Tile Buffer General", validate_loadstore_tile_buffer_general), + VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all), + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning), + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore), - VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 1, 1, "Indexed Primitive List", validate_indexed_prim_list), + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list), - VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 1, 1, "Vertex Array Primitives", validate_gl_array_primitive), + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive), /* This is only used by clipped primitives (packets 48 and 49), which * we don't support parsing yet. */ - VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 1, 1, "primitive list format", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, 1, 1, "GL Shader State", validate_gl_shader_state), - VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, 1, 1, "NV Shader State", validate_nv_shader_state), - - VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, 1, 1, "configuration bits", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, 1, 1, "flat shade flags", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, 1, 1, "point size", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, 1, 1, "line width", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, 1, 1, "RHT X boundary", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, 1, 1, "Depth Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, 1, 1, "Clip Window", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, 1, 1, "Viewport Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, 1, 1, "Clipper XY Scaling", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL), + + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state), + VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state), + + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL), /* Note: The docs say this was also 105, but it was 106 in the * initial userland code drop. */ - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, 1, 1, "Clipper Z Scale and Offset", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 1, 0, "tile binning configuration", validate_tile_binning_config), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 0, 1, "tile rendering mode configuration", validate_tile_rendering_mode_config), + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config), - VC4_DEFINE_PACKET(VC4_PACKET_CLEAR_COLORS, 0, 1, "Clear Colors", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_TILE_COORDINATES, 0, 1, "Tile Coordinates", validate_tile_coordinates), - - VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, 1, 1, "GEM handles", validate_gem_handles), + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles), }; int -vc4_validate_cl(struct drm_device *dev, - void *validated, - void *unvalidated, - uint32_t len, - bool is_bin, - bool has_bin, - struct vc4_exec_info *exec) +vc4_validate_bin_cl(struct drm_device *dev, + void *validated, + void *unvalidated, + struct vc4_exec_info *exec) { + uint32_t len = exec->args->bin_cl_size; uint32_t dst_offset = 0; uint32_t src_offset = 0; @@ -734,14 +544,6 @@ vc4_validate_cl(struct drm_device *dev, src_offset, cmd, info->name, info->len); #endif - if ((is_bin && !info->bin) || - (!is_bin && !info->render)) { - DRM_ERROR("0x%08x: packet %d (%s) invalid for %s\n", - src_offset, cmd, info->name, - is_bin ? "binner" : "render"); - return -EINVAL; - } - if (src_offset + info->len > len) { DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x " "exceeds bounds (0x%08x)\n", @@ -772,30 +574,16 @@ vc4_validate_cl(struct drm_device *dev, break; } - if (is_bin) { - exec->ct0ea = exec->ct0ca + dst_offset; + exec->ct0ea = exec->ct0ca + dst_offset; - if (has_bin && !exec->found_start_tile_binning_packet) { - DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); - return -EINVAL; - } - } else { - if (!exec->found_tile_rendering_mode_config_packet) { - DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n"); - return -EINVAL; - } + if (!exec->found_start_tile_binning_packet) { + DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); + return -EINVAL; + } - /* Make sure that they actually consumed the semaphore - * increment from the bin CL. Otherwise a later submit would - * have render execute immediately. - */ - if (exec->found_wait_on_semaphore_packet != has_bin) { - DRM_ERROR("Render CL %s VC4_PACKET_WAIT_ON_SEMAPHORE\n", - exec->found_wait_on_semaphore_packet ? - "has" : "missing"); - return -EINVAL; - } - exec->ct1ea = exec->ct1ca + dst_offset; + if (!exec->found_increment_semaphore_packet) { + DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n"); + return -EINVAL; } return 0; @@ -910,8 +698,8 @@ reloc_tex(struct vc4_exec_info *exec, tiling_format = VC4_TILING_FORMAT_T; } - if (!check_tex_size(exec, tex, offset + cube_map_stride * 5, - tiling_format, width, height, cpp)) { + if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5, + tiling_format, width, height, cpp)) { return false; } diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index b3811025cc1..d29e2c9c318 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -26,87 +26,7 @@ #include "util/u_blitter.h" #include "vc4_context.h" -static void -vc4_tile_blit_color_rcl(struct vc4_context *vc4, - struct vc4_surface *dst_surf, - struct vc4_surface *src_surf) -{ - struct vc4_resource *src = vc4_resource(src_surf->base.texture); - struct vc4_resource *dst = vc4_resource(dst_surf->base.texture); - - uint32_t min_x_tile = 0; - uint32_t min_y_tile = 0; - uint32_t max_x_tile = (dst_surf->base.width - 1) / 64; - uint32_t max_y_tile = (dst_surf->base.height - 1) / 64; - uint32_t xtiles = max_x_tile - min_x_tile + 1; - uint32_t ytiles = max_y_tile - min_y_tile + 1; - cl_ensure_space(&vc4->rcl, - (VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) + - xtiles * ytiles * ((VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) * 2 + - VC4_PACKET_TILE_COORDINATES_SIZE)); - cl_ensure_space(&vc4->bo_handles, 2 * sizeof(uint32_t)); - cl_ensure_space(&vc4->bo_pointers, 2 * sizeof(struct vc4_bo *)); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); - cl_reloc(vc4, &vc4->rcl, dst->bo, dst_surf->offset); - cl_u16(&vc4->rcl, dst_surf->base.width); - cl_u16(&vc4->rcl, dst_surf->base.height); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(dst_surf->tiling, - VC4_RENDER_CONFIG_MEMORY_FORMAT) | - VC4_SET_FIELD(vc4_rt_format_is_565(dst_surf->base.format) ? - VC4_RENDER_CONFIG_FORMAT_BGR565 : - VC4_RENDER_CONFIG_FORMAT_RGBA8888, - VC4_RENDER_CONFIG_FORMAT)); - - uint32_t src_hindex = vc4_gem_hindex(vc4, src->bo); - - for (int y = min_y_tile; y <= max_y_tile; y++) { - for (int x = min_x_tile; x <= max_x_tile; x++) { - bool end_of_frame = (x == max_x_tile && - y == max_y_tile); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(src_surf->tiling, - VC4_LOADSTORE_TILE_BUFFER_TILING) | - VC4_SET_FIELD(vc4_rt_format_is_565(src_surf->base.format) ? - VC4_LOADSTORE_TILE_BUFFER_BGR565 : - VC4_LOADSTORE_TILE_BUFFER_RGBA8888, - VC4_LOADSTORE_TILE_BUFFER_FORMAT)); - cl_reloc_hindex(&vc4->rcl, src_hindex, - src_surf->offset); - - cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); - cl_u8(&vc4->rcl, x); - cl_u8(&vc4->rcl, y); - - if (end_of_frame) { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); - } else { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER); - } - } - } - - vc4->draw_min_x = 0; - vc4->draw_min_y = 0; - vc4->draw_max_x = dst_surf->base.width; - vc4->draw_max_y = dst_surf->base.height; - - dst->writes++; - vc4->needs_flush = true; -} - -static struct vc4_surface * +static struct pipe_surface * vc4_get_blit_surface(struct pipe_context *pctx, struct pipe_resource *prsc, unsigned level) { @@ -118,7 +38,7 @@ vc4_get_blit_surface(struct pipe_context *pctx, tmpl.u.tex.first_layer = 0; tmpl.u.tex.last_layer = 0; - return vc4_surface(pctx->create_surface(pctx, prsc, &tmpl)); + return pctx->create_surface(pctx, prsc, &tmpl); } static bool @@ -142,17 +62,28 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) if (info->dst.resource->format != info->src.resource->format) return false; - struct vc4_surface *dst_surf = + vc4_flush(pctx); + + struct pipe_surface *dst_surf = vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level); - struct vc4_surface *src_surf = + struct pipe_surface *src_surf = vc4_get_blit_surface(pctx, info->src.resource, info->src.level); - vc4_flush(pctx); - vc4_tile_blit_color_rcl(vc4, dst_surf, src_surf); + pipe_surface_reference(&vc4->color_read, src_surf); + pipe_surface_reference(&vc4->color_write, dst_surf); + pipe_surface_reference(&vc4->zs_read, NULL); + pipe_surface_reference(&vc4->zs_write, NULL); + vc4->draw_min_x = 0; + vc4->draw_min_y = 0; + vc4->draw_max_x = dst_surf->width; + vc4->draw_max_y = dst_surf->height; + vc4->draw_width = dst_surf->width; + vc4->draw_height = dst_surf->height; + vc4->needs_flush = true; vc4_job_submit(vc4); - pctx->surface_destroy(pctx, &dst_surf->base); - pctx->surface_destroy(pctx, &src_surf->base); + pipe_surface_reference(&dst_surf, NULL); + pipe_surface_reference(&src_surf, NULL); return true; } diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 10b58b0d815..ebd357f7065 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -37,271 +37,12 @@ #include "vc4_context.h" #include "vc4_resource.h" -/** - * Emits a no-op STORE_TILE_BUFFER_GENERAL. - * - * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of - * some sort before another load is triggered. - */ -static void -vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted) -{ - if (!*coords_emitted) - return; - - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | - VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | - VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR); - cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ - - *coords_emitted = false; -} - -/** - * Emits a PACKET_TILE_COORDINATES if one isn't already pending. - * - * The tile coordinates packet triggers a pending load if there is one, are - * used for clipping during rendering, and determine where loads/stores happen - * relative to their base address. - */ -static void -vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y, - bool *coords_emitted) -{ - if (*coords_emitted) - return; - - cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); - cl_u8(&vc4->rcl, x); - cl_u8(&vc4->rcl, y); - - *coords_emitted = true; -} - -static void -vc4_setup_rcl(struct vc4_context *vc4) -{ - struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); - struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL; - struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf); - struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL; - - if (!csurf) - vc4->resolve &= ~PIPE_CLEAR_COLOR0; - if (!zsurf) - vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); - uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared; - uint32_t width = vc4->framebuffer.width; - uint32_t height = vc4->framebuffer.height; - uint32_t stride_in_tiles = align(width, 64) / 64; - - assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0); - uint32_t min_x_tile = vc4->draw_min_x / 64; - uint32_t min_y_tile = vc4->draw_min_y / 64; - uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64; - uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64; - uint32_t xtiles = max_x_tile - min_x_tile + 1; - uint32_t ytiles = max_y_tile - min_y_tile + 1; - -#if 0 - fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n", - vc4->resolve, - vc4->cleared, - resolve_uncleared); -#endif - - cl_ensure_space(&vc4->rcl, - VC4_PACKET_CLEAR_COLORS_SIZE + - (VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) + - (VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE + - VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE) + - VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE + - xtiles * ytiles * ((VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) * 4 + - VC4_PACKET_TILE_COORDINATES_SIZE * 3 + - (VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE + - VC4_PACKET_GEM_HANDLES_SIZE) + - VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE)); - - if (vc4->cleared) { - cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); - cl_u32(&vc4->rcl, vc4->clear_color[0]); - cl_u32(&vc4->rcl, vc4->clear_color[1]); - cl_u32(&vc4->rcl, vc4->clear_depth); - cl_u8(&vc4->rcl, vc4->clear_stencil); - } - - /* The rendering mode config determines the pointer that's used for - * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel - * could handle a no-relocation rendering mode config and deny those - * packets, but instead we just tell the kernel we're doing our color - * rendering to the Z buffer, and just don't emit any of those - * packets. - */ - struct vc4_surface *render_surf = csurf ? csurf : zsurf; - struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture); - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); - cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset); - cl_u16(&vc4->rcl, width); - cl_u16(&vc4->rcl, height); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(render_surf->tiling, - VC4_RENDER_CONFIG_MEMORY_FORMAT) | - VC4_SET_FIELD((vc4_rt_format_is_565(render_surf->base.format) ? - VC4_RENDER_CONFIG_FORMAT_BGR565 : - VC4_RENDER_CONFIG_FORMAT_RGBA8888), - VC4_RENDER_CONFIG_FORMAT)); - - /* The tile buffer normally gets cleared when the previous tile is - * stored. If the clear values changed between frames, then the tile - * buffer has stale clear values in it, so we have to do a store in - * None mode (no writes) so that we trigger the tile buffer clear. - * - * Excess clearing is only a performance cost, since per-tile contents - * will be loaded/stored in the loop below. - */ - if (vc4->cleared & (PIPE_CLEAR_COLOR0 | - PIPE_CLEAR_DEPTH | - PIPE_CLEAR_STENCIL)) { - cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); - cl_u8(&vc4->rcl, 0); - cl_u8(&vc4->rcl, 0); - - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); - cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ - } - - uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0; - uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0; - uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc); - - for (int y = min_y_tile; y <= max_y_tile; y++) { - for (int x = min_x_tile; x <= max_x_tile; x++) { - bool end_of_frame = (x == max_x_tile && - y == max_y_tile); - bool coords_emitted = false; - - /* Note that the load doesn't actually occur until the - * tile coords packet is processed, and only one load - * may be outstanding at a time. - */ - if (resolve_uncleared & PIPE_CLEAR_COLOR) { - vc4_store_before_load(vc4, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(csurf->tiling, - VC4_LOADSTORE_TILE_BUFFER_TILING) | - VC4_SET_FIELD(vc4_rt_format_is_565(csurf->base.format) ? - VC4_LOADSTORE_TILE_BUFFER_BGR565 : - VC4_LOADSTORE_TILE_BUFFER_RGBA8888, - VC4_LOADSTORE_TILE_BUFFER_FORMAT)); - cl_reloc_hindex(&vc4->rcl, color_hindex, - csurf->offset); - - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - } - - if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - vc4_store_before_load(vc4, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(zsurf->tiling, - VC4_LOADSTORE_TILE_BUFFER_TILING)); - cl_reloc_hindex(&vc4->rcl, depth_hindex, - zsurf->offset); - - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - } - - /* Clipping depends on tile coordinates having been - * emitted, so make sure it's happened even if - * everything was cleared to start. - */ - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - - /* Wait for the binner before jumping to the first - * tile's lists. - */ - if (x == min_x_tile && y == min_y_tile) - cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); - cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex, - (y * stride_in_tiles + x) * 32); - - if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - - cl_start_reloc(&vc4->rcl, 1); - cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); - cl_u16(&vc4->rcl, - VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, - VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(zsurf->tiling, - VC4_LOADSTORE_TILE_BUFFER_TILING) | - VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR); - cl_reloc_hindex(&vc4->rcl, depth_hindex, - zsurf->offset | - ((end_of_frame && - !(vc4->resolve & PIPE_CLEAR_COLOR0)) ? - VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); - - coords_emitted = false; - } - - if (vc4->resolve & PIPE_CLEAR_COLOR0) { - vc4_tile_coordinates(vc4, x, y, &coords_emitted); - if (end_of_frame) { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); - } else { - cl_u8(&vc4->rcl, - VC4_PACKET_STORE_MS_TILE_BUFFER); - } - - coords_emitted = false; - } - - /* One of the bits needs to have been set that would - * have triggered an EOF. - */ - assert(vc4->resolve & (PIPE_CLEAR_COLOR0 | - PIPE_CLEAR_DEPTH | - PIPE_CLEAR_STENCIL)); - /* Any coords emitted must also have been consumed by - * a store. - */ - assert(!coords_emitted); - } - } - - if (vc4->resolve & PIPE_CLEAR_COLOR0) - ctex->writes++; - - if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) - ztex->writes++; -} - void vc4_flush(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); + struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; + struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; if (!vc4->needs_flush) return; @@ -324,7 +65,31 @@ vc4_flush(struct pipe_context *pctx) /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ cl_u8(&vc4->bcl, VC4_PACKET_FLUSH); - vc4_setup_rcl(vc4); + if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) { + pipe_surface_reference(&vc4->color_write, cbuf); + if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) { + pipe_surface_reference(&vc4->color_read, cbuf); + } else { + pipe_surface_reference(&vc4->color_read, NULL); + } + + } else { + pipe_surface_reference(&vc4->color_write, NULL); + pipe_surface_reference(&vc4->color_read, NULL); + } + + if (vc4->framebuffer.zsbuf && + (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + pipe_surface_reference(&vc4->zs_write, zsbuf); + if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + pipe_surface_reference(&vc4->zs_read, zsbuf); + } else { + pipe_surface_reference(&vc4->zs_read, NULL); + } + } else { + pipe_surface_reference(&vc4->zs_write, NULL); + pipe_surface_reference(&vc4->zs_read, NULL); + } vc4_job_submit(vc4); } diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 41dacb9172d..ad5d0b153ff 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -178,12 +178,18 @@ struct vc4_context { struct vc4_screen *screen; struct vc4_cl bcl; - struct vc4_cl rcl; struct vc4_cl shader_rec; struct vc4_cl uniforms; struct vc4_cl bo_handles; struct vc4_cl bo_pointers; uint32_t shader_rec_count; + + /** @{ Surfaces to submit rendering for. */ + struct pipe_surface *color_read; + struct pipe_surface *color_write; + struct pipe_surface *zs_read; + struct pipe_surface *zs_write; + /** @} */ /** @{ * Bounding box of the scissor across all queued drawing. * @@ -194,6 +200,13 @@ struct vc4_context { uint32_t draw_max_x; uint32_t draw_max_y; /** @} */ + /** @{ + * Width/height of the color framebuffer being rendered to, + * for VC4_TILE_RENDERING_MODE_CONFIG. + */ + uint32_t draw_width; + uint32_t draw_height; + /** @} */ struct vc4_bo *tile_alloc; struct vc4_bo *tile_state; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 15743ea7671..3e181d0606a 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -129,6 +129,8 @@ vc4_start_draw(struct vc4_context *vc4) vc4->needs_flush = true; vc4->draw_call_queued = true; + vc4->draw_width = width; + vc4->draw_height = height; } static void diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h index 062fd3b687e..5f1ee4fa125 100644 --- a/src/gallium/drivers/vc4/vc4_drm.h +++ b/src/gallium/drivers/vc4/vc4_drm.h @@ -38,6 +38,15 @@ #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) +struct drm_vc4_submit_rcl_surface { + uint32_t hindex; /* Handle index, or ~0 if not present. */ + uint32_t offset; /* Offset to start of buffer. */ + /* + * Bits for either render config (color_ms_write) or load/store packet. + */ + uint16_t bits; + uint16_t pad; +}; /** * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D @@ -62,16 +71,6 @@ struct drm_vc4_submit_cl { */ uint64_t bin_cl; - /* Pointer to the render command list. - * - * The render command list contains a set of packets to load the - * current tile's state (reading from memory, or just clearing it) - * into the GPU, then call into the tile allocation BO to run the - * stored rendering for that tile, then store the tile's state back to - * memory. - */ - uint64_t render_cl; - /* Pointer to the shader records. * * Shader records are the structures read by the hardware that contain @@ -102,8 +101,6 @@ struct drm_vc4_submit_cl { /* Size in bytes of the binner command list. */ uint32_t bin_cl_size; - /* Size in bytes of the render command list */ - uint32_t render_cl_size; /* Size in bytes of the set of shader records. */ uint32_t shader_rec_size; /* Number of shader records. @@ -119,8 +116,25 @@ struct drm_vc4_submit_cl { /* Number of BO handles passed in (size is that times 4). */ uint32_t bo_handle_count; + /* RCL setup: */ + uint16_t width; + uint16_t height; + uint8_t min_x_tile; + uint8_t min_y_tile; + uint8_t max_x_tile; + uint8_t max_y_tile; + struct drm_vc4_submit_rcl_surface color_read; + struct drm_vc4_submit_rcl_surface color_ms_write; + struct drm_vc4_submit_rcl_surface zs_read; + struct drm_vc4_submit_rcl_surface zs_write; + uint32_t clear_color[2]; + uint32_t clear_z; + uint8_t clear_s; + + uint32_t pad:24; + +#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) uint32_t flags; - uint32_t pad; /* Returned value of the seqno of this render job (for the * wait ioctl). diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index 76037162102..dcade15443a 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -33,7 +33,6 @@ void vc4_job_init(struct vc4_context *vc4) { vc4_init_cl(vc4, &vc4->bcl); - vc4_init_cl(vc4, &vc4->rcl); vc4_init_cl(vc4, &vc4->shader_rec); vc4_init_cl(vc4, &vc4->uniforms); vc4_init_cl(vc4, &vc4->bo_handles); @@ -50,7 +49,6 @@ vc4_job_reset(struct vc4_context *vc4) vc4_bo_unreference(&referenced_bos[i]); } vc4_reset_cl(&vc4->bcl); - vc4_reset_cl(&vc4->rcl); vc4_reset_cl(&vc4->shader_rec); vc4_reset_cl(&vc4->uniforms); vc4_reset_cl(&vc4->bo_handles); @@ -75,6 +73,70 @@ vc4_job_reset(struct vc4_context *vc4) vc4->draw_max_y = 0; } +static void +vc4_submit_setup_rcl_surface(struct vc4_context *vc4, + struct drm_vc4_submit_rcl_surface *submit_surf, + struct pipe_surface *psurf, + bool is_depth, bool is_write) +{ + struct vc4_surface *surf = vc4_surface(psurf); + + if (!surf) { + submit_surf->hindex = ~0; + return; + } + + struct vc4_resource *rsc = vc4_resource(psurf->texture); + submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->offset = surf->offset; + + if (is_depth) { + submit_surf->bits = + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, + VC4_LOADSTORE_TILE_BUFFER_BUFFER); + + } else { + submit_surf->bits = + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ? + VC4_LOADSTORE_TILE_BUFFER_BGR565 : + VC4_LOADSTORE_TILE_BUFFER_RGBA8888, + VC4_LOADSTORE_TILE_BUFFER_FORMAT); + } + submit_surf->bits |= + VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING); + + if (is_write) + rsc->writes++; +} + +static void +vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4, + struct drm_vc4_submit_rcl_surface *submit_surf, + struct pipe_surface *psurf) +{ + struct vc4_surface *surf = vc4_surface(psurf); + + if (!surf) { + submit_surf->hindex = ~0; + return; + } + + struct vc4_resource *rsc = vc4_resource(psurf->texture); + submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); + submit_surf->offset = surf->offset; + + submit_surf->bits = + VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ? + VC4_RENDER_CONFIG_FORMAT_BGR565 : + VC4_RENDER_CONFIG_FORMAT_RGBA8888, + VC4_RENDER_CONFIG_FORMAT) | + VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); + + rsc->writes++; +} + /** * Submits the job to the kernel and then reinitializes it. */ @@ -84,26 +146,49 @@ vc4_job_submit(struct vc4_context *vc4) if (vc4_debug & VC4_DEBUG_CL) { fprintf(stderr, "BCL:\n"); vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false); - fprintf(stderr, "RCL:\n"); - vc4_dump_cl(vc4->rcl.base, vc4->rcl.next - vc4->rcl.base, true); } struct drm_vc4_submit_cl submit; memset(&submit, 0, sizeof(submit)); + cl_ensure_space(&vc4->bo_handles, 4 * sizeof(uint32_t)); + cl_ensure_space(&vc4->bo_pointers, 4 * sizeof(struct vc4_bo *)); + + vc4_submit_setup_rcl_surface(vc4, &submit.color_read, + vc4->color_read, false, false); + vc4_submit_setup_ms_rcl_surface(vc4, &submit.color_ms_write, + vc4->color_write); + vc4_submit_setup_rcl_surface(vc4, &submit.zs_read, + vc4->zs_read, true, false); + vc4_submit_setup_rcl_surface(vc4, &submit.zs_write, + vc4->zs_write, true, true); + submit.bo_handles = (uintptr_t)vc4->bo_handles.base; submit.bo_handle_count = (vc4->bo_handles.next - vc4->bo_handles.base) / 4; submit.bin_cl = (uintptr_t)vc4->bcl.base; submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base; - submit.render_cl = (uintptr_t)vc4->rcl.base; - submit.render_cl_size = vc4->rcl.next - vc4->rcl.base; submit.shader_rec = (uintptr_t)vc4->shader_rec.base; submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base; submit.shader_rec_count = vc4->shader_rec_count; submit.uniforms = (uintptr_t)vc4->uniforms.base; submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base; + assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0); + submit.min_x_tile = vc4->draw_min_x / 64; + submit.min_y_tile = vc4->draw_min_y / 64; + submit.max_x_tile = (vc4->draw_max_x - 1) / 64; + submit.max_y_tile = (vc4->draw_max_y - 1) / 64; + submit.width = vc4->draw_width; + submit.height = vc4->draw_height; + if (vc4->cleared) { + submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; + submit.clear_color[0] = vc4->clear_color[0]; + submit.clear_color[1] = vc4->clear_color[1]; + submit.clear_z = vc4->clear_depth; + submit.clear_s = vc4->clear_stencil; + } + if (!(vc4_debug & VC4_DEBUG_NORAST)) { int ret; -- 2.11.0