From c79a8e840c435bc26a251e34b043318e8b2081db Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 20 Jun 2022 18:48:37 -0700 Subject: [PATCH] util/cacheflush: Optimize flushing when ppc host has coherent icache On linux, the AT_HWCAP bit PPC_FEATURE_ICACHE_SNOOP indicates that we can use a simplified 3 instruction flush sequence. Signed-off-by: Nicholas Piggin Message-Id: <20220519141131.29839-1-npiggin@gmail.com> [rth: update after merging cacheflush.c and cacheinfo.c] Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-Id: <20220621014837.189139-4-richard.henderson@linaro.org> --- util/cacheflush.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/util/cacheflush.c b/util/cacheflush.c index 01b6cb7583..2c2c73e085 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -117,6 +117,10 @@ static void sys_cache_info(int *isize, int *dsize) * Architecture (+ OS) specific cache detection mechanisms. */ +#if defined(__powerpc__) +static bool have_coherent_icache; +#endif + #if defined(__aarch64__) && !defined(CONFIG_DARWIN) /* Apple does not expose CTR_EL0, so we must use system interfaces. */ static uint64_t save_ctr_el0; @@ -156,6 +160,7 @@ static void arch_cache_info(int *isize, int *dsize) if (*dsize == 0) { *dsize = qemu_getauxval(AT_DCACHEBSIZE); } + have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; } #else @@ -298,8 +303,24 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { uintptr_t p, b, e; - size_t dsize = qemu_dcache_linesize; - size_t isize = qemu_icache_linesize; + size_t dsize, isize; + + /* + * Some processors have coherent caches and support a simplified + * flushing procedure. See + * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) + * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k + */ + if (have_coherent_icache) { + asm volatile ("sync\n\t" + "icbi 0,%0\n\t" + "isync" + : : "r"(rx) : "memory"); + return; + } + + dsize = qemu_dcache_linesize; + isize = qemu_icache_linesize; b = rw & ~(dsize - 1); e = (rw + len + dsize - 1) & ~(dsize - 1); -- 2.11.0