From 3e06b918aab3c6a4ca30f5e935aa6996f7009d56 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 7 Aug 2018 17:53:24 -0700 Subject: [PATCH] vc4: Compile the LT image helper per cpp we might load/store. For the partial load/store support I'm about to add, we want the memcpy to be compiled out to a single load/store. This should also eliminate the calls to vc4_utile_width/height(). Improves x11perf -putimage100 performance by 3.76344% +/- 1.16978% (n=15) --- src/gallium/drivers/vc4/vc4_tiling_lt.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_tiling_lt.c b/src/gallium/drivers/vc4/vc4_tiling_lt.c index b8f4c0405c2..8c875e7bd3a 100644 --- a/src/gallium/drivers/vc4/vc4_tiling_lt.c +++ b/src/gallium/drivers/vc4/vc4_tiling_lt.c @@ -289,12 +289,40 @@ vc4_lt_image_helper(void *gpu, uint32_t gpu_stride, } } +static inline void +vc4_lt_image_cpp_helper(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, const struct pipe_box *box, bool to_cpu) +{ + switch (cpp) { + case 1: + vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 1, box, + to_cpu); + break; + case 2: + vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 2, box, + to_cpu); + break; + case 4: + vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 4, box, + to_cpu); + break; + case 8: + vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 8, box, + to_cpu); + break; + default: + unreachable("bad cpp"); + } +} + void NEON_TAG(vc4_load_lt_image)(void *dst, uint32_t dst_stride, void *src, uint32_t src_stride, int cpp, const struct pipe_box *box) { - vc4_lt_image_helper(src, src_stride, dst, dst_stride, cpp, box, true); + vc4_lt_image_cpp_helper(src, src_stride, dst, dst_stride, cpp, box, + true); } void @@ -302,5 +330,6 @@ NEON_TAG(vc4_store_lt_image)(void *dst, uint32_t dst_stride, void *src, uint32_t src_stride, int cpp, const struct pipe_box *box) { - vc4_lt_image_helper(dst, dst_stride, src, src_stride, cpp, box, false); + vc4_lt_image_cpp_helper(dst, dst_stride, src, src_stride, cpp, box, + false); } -- 2.11.0