From 592134617c98f37b8b39c6dd684e5a1832c071d2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 30 Oct 2022 12:07:32 +1100 Subject: [PATCH] accel/tcg: Reorg system mode store helpers Instead of trying to unify all operations on uint64_t, use mmu_lookup() to perform the basic tlb hit and resolution. Create individual functions to handle access by size. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 394 +++++++++++++++++++++++++---------------------------- 1 file changed, 186 insertions(+), 208 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index a85edd8246..617777055a 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2532,322 +2532,300 @@ store_memop(void *haddr, uint64_t val, MemOp op) } } -static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr); - -static void __attribute__((noinline)) -store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val, - uintptr_t retaddr, size_t size, uintptr_t mmu_idx, - bool big_endian) -{ - uintptr_t index, index2; - CPUTLBEntry *entry, *entry2; - target_ulong page1, page2, tlb_addr, tlb_addr2; - MemOpIdx oi; - size_t size2; - int i; +/** + * do_st_mmio_leN: + * @env: cpu context + * @p: translation parameters + * @val_le: data to store + * @mmu_idx: virtual address context + * @ra: return address into tcg generated code, or 0 + * + * Store @p->size bytes at @p->addr, which is memory-mapped i/o. + * The bytes to store are extracted in little-endian order from @val_le; + * return the bytes of @val_le beyond @p->size that have not been stored. + */ +static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p, + uint64_t val_le, int mmu_idx, uintptr_t ra) +{ + CPUTLBEntryFull *full = p->full; + target_ulong addr = p->addr; + int i, size = p->size; - /* - * Ensure the second page is in the TLB. Note that the first page - * is already guaranteed to be filled, and that the second page - * cannot evict the first. An exception to this rule is PAGE_WRITE_INV - * handling: the first page could have evicted itself. - */ - page1 = addr & TARGET_PAGE_MASK; - page2 = (addr + size) & TARGET_PAGE_MASK; - size2 = (addr + size) & ~TARGET_PAGE_MASK; - index2 = tlb_index(env, mmu_idx, page2); - entry2 = tlb_entry(env, mmu_idx, page2); - - tlb_addr2 = tlb_addr_write(entry2); - if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) { - if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) { - tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE, - mmu_idx, retaddr); - index2 = tlb_index(env, mmu_idx, page2); - entry2 = tlb_entry(env, mmu_idx, page2); - } - tlb_addr2 = tlb_addr_write(entry2); + QEMU_IOTHREAD_LOCK_GUARD(); + for (i = 0; i < size; i++, val_le >>= 8) { + io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB); } + return val_le; +} - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); +/** + * do_st_bytes_leN: + * @p: translation parameters + * @val_le: data to store + * + * Store @p->size bytes at @p->haddr, which is RAM. + * The bytes to store are extracted in little-endian order from @val_le; + * return the bytes of @val_le beyond @p->size that have not been stored. + */ +static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le) +{ + uint8_t *haddr = p->haddr; + int i, size = p->size; - /* - * Handle watchpoints. Since this may trap, all checks - * must happen before any store. - */ - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - cpu_check_watchpoint(env_cpu(env), addr, size - size2, - env_tlb(env)->d[mmu_idx].fulltlb[index].attrs, - BP_MEM_WRITE, retaddr); - } - if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) { - cpu_check_watchpoint(env_cpu(env), page2, size2, - env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs, - BP_MEM_WRITE, retaddr); + for (i = 0; i < size; i++, val_le >>= 8) { + haddr[i] = val_le; } + return val_le; +} - /* - * XXX: not efficient, but simple. - * This loop must go in the forward direction to avoid issues - * with self-modifying code in Windows 64-bit. - */ - oi = make_memop_idx(MO_UB, mmu_idx); - if (big_endian) { - for (i = 0; i < size; ++i) { - /* Big-endian extract. */ - uint8_t val8 = val >> (((size - 1) * 8) - (i * 8)); - full_stb_mmu(env, addr + i, val8, oi, retaddr); - } +/* + * Wrapper for the above. + */ +static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p, + uint64_t val_le, int mmu_idx, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + return do_st_mmio_leN(env, p, val_le, mmu_idx, ra); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + return val_le >> (p->size * 8); } else { - for (i = 0; i < size; ++i) { - /* Little-endian extract. */ - uint8_t val8 = val >> (i * 8); - full_stb_mmu(env, addr + i, val8, oi, retaddr); - } + return do_st_bytes_leN(p, val_le); } } -static inline void QEMU_ALWAYS_INLINE -store_helper(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr, MemOp op) +static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val, + int mmu_idx, uintptr_t ra) { - const unsigned a_bits = get_alignment_bits(get_memop(oi)); - const size_t size = memop_size(op); - uintptr_t mmu_idx = get_mmuidx(oi); - uintptr_t index; - CPUTLBEntry *entry; - target_ulong tlb_addr; - void *haddr; - - tcg_debug_assert(mmu_idx < NB_MMU_MODES); - - /* Handle CPU specific unaligned behaviour */ - if (addr & ((1 << a_bits) - 1)) { - cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, - mmu_idx, retaddr); + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + *(uint8_t *)p->haddr = val; } +} - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); - tlb_addr = tlb_addr_write(entry); - - /* If the TLB entry is for a different page, reload and try again. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE, - addr & TARGET_PAGE_MASK)) { - tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - entry = tlb_entry(env, mmu_idx, addr); +static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val, + int mmu_idx, MemOp memop, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap16(val); } - tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK; + store_memop(p->haddr, val, MO_UW); } +} - /* Handle anything that isn't just a straight memory access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUTLBEntryFull *full; - bool need_swap; - - /* For anything that is unaligned, recurse through byte stores. */ - if ((addr & (size - 1)) != 0) { - goto do_unaligned_access; - } - - full = &env_tlb(env)->d[mmu_idx].fulltlb[index]; - - /* Handle watchpoints. */ - if (unlikely(tlb_addr & TLB_WATCHPOINT)) { - /* On watchpoint hit, this will longjmp out. */ - cpu_check_watchpoint(env_cpu(env), addr, size, - full->attrs, BP_MEM_WRITE, retaddr); - } - - need_swap = size > 1 && (tlb_addr & TLB_BSWAP); - - /* Handle I/O access. */ - if (tlb_addr & TLB_MMIO) { - io_writex(env, full, mmu_idx, val, addr, retaddr, - op ^ (need_swap * MO_BSWAP)); - return; - } - - /* Ignore writes to ROM. */ - if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) { - return; - } - - /* Handle clean RAM pages. */ - if (tlb_addr & TLB_NOTDIRTY) { - notdirty_write(env_cpu(env), addr, size, full, retaddr); - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - - /* - * Keep these two store_memop separate to ensure that the compiler - * is able to fold the entire function to a single instruction. - * There is a build-time assert inside to remind you of this. ;-) - */ - if (unlikely(need_swap)) { - store_memop(haddr, val, op ^ MO_BSWAP); - } else { - store_memop(haddr, val, op); +static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val, + int mmu_idx, MemOp memop, uintptr_t ra) +{ + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap32(val); } - return; + store_memop(p->haddr, val, MO_UL); } - - /* Handle slow unaligned access (it spans two pages or IO). */ - if (size > 1 - && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1 - >= TARGET_PAGE_SIZE)) { - do_unaligned_access: - store_helper_unaligned(env, addr, val, retaddr, size, - mmu_idx, memop_big_endian(op)); - return; - } - - haddr = (void *)((uintptr_t)addr + entry->addend); - store_memop(haddr, val, op); } -static void __attribute__((noinline)) -full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val, + int mmu_idx, MemOp memop, uintptr_t ra) { - validate_memop(oi, MO_UB); - store_helper(env, addr, val, oi, retaddr, MO_UB); + if (unlikely(p->flags & TLB_MMIO)) { + io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop); + } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) { + /* nothing */ + } else { + /* Swap to host endian if necessary, then store. */ + if (memop & MO_BSWAP) { + val = bswap64(val); + } + store_memop(p->haddr, val, MO_UQ); + } } void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val, - MemOpIdx oi, uintptr_t retaddr) + MemOpIdx oi, uintptr_t ra) { - full_stb_mmu(env, addr, val, oi, retaddr); + MMULookupLocals l; + bool crosspage; + + validate_memop(oi, MO_UB); + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + tcg_debug_assert(!crosspage); + + do_st_1(env, &l.page[0], val, l.mmu_idx, ra); } -static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val, + MemOpIdx oi, uintptr_t ra) { - validate_memop(oi, MO_LEUW); - store_helper(env, addr, val, oi, retaddr, MO_LEUW); + MMULookupLocals l; + bool crosspage; + uint8_t a, b; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + if ((l.memop & MO_BSWAP) == MO_LE) { + a = val, b = val >> 8; + } else { + b = val, a = val >> 8; + } + do_st_1(env, &l.page[0], a, l.mmu_idx, ra); + do_st_1(env, &l.page[1], b, l.mmu_idx, ra); } void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_le_stw_mmu(env, addr, val, oi, retaddr); -} - -static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) -{ - validate_memop(oi, MO_BEUW); - store_helper(env, addr, val, oi, retaddr, MO_BEUW); + validate_memop(oi, MO_LEUW); + do_st2_mmu(env, addr, val, oi, retaddr); } void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_be_stw_mmu(env, addr, val, oi, retaddr); + validate_memop(oi, MO_BEUW); + do_st2_mmu(env, addr, val, oi, retaddr); } -static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t ra) { - validate_memop(oi, MO_LEUL); - store_helper(env, addr, val, oi, retaddr, MO_LEUL); + MMULookupLocals l; + bool crosspage; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + /* Swap to little endian for simplicity, then store by bytes. */ + if ((l.memop & MO_BSWAP) != MO_LE) { + val = bswap32(val); + } + val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra); + (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra); } void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - full_le_stl_mmu(env, addr, val, oi, retaddr); + validate_memop(oi, MO_LEUL); + do_st4_mmu(env, addr, val, oi, retaddr); } -static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val, - MemOpIdx oi, uintptr_t retaddr) +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, + MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUL); - store_helper(env, addr, val, oi, retaddr, MO_BEUL); + do_st4_mmu(env, addr, val, oi, retaddr); } -void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, - MemOpIdx oi, uintptr_t retaddr) +static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val, + MemOpIdx oi, uintptr_t ra) { - full_be_stl_mmu(env, addr, val, oi, retaddr); + MMULookupLocals l; + bool crosspage; + + crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l); + if (likely(!crosspage)) { + do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra); + return; + } + + /* Swap to little endian for simplicity, then store by bytes. */ + if ((l.memop & MO_BSWAP) != MO_LE) { + val = bswap64(val); + } + val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra); + (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra); } void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_LEUQ); - store_helper(env, addr, val, oi, retaddr, MO_LEUQ); + do_st8_mmu(env, addr, val, oi, retaddr); } void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { validate_memop(oi, MO_BEUQ); - store_helper(env, addr, val, oi, retaddr, MO_BEUQ); + do_st8_mmu(env, addr, val, oi, retaddr); } /* * Store Helpers for cpu_ldst.h */ -typedef void FullStoreHelper(CPUArchState *env, target_ulong addr, - uint64_t val, MemOpIdx oi, uintptr_t retaddr); - -static inline void cpu_store_helper(CPUArchState *env, target_ulong addr, - uint64_t val, MemOpIdx oi, uintptr_t ra, - FullStoreHelper *full_store) +static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) { - full_store(env, addr, val, oi, ra); qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu); + helper_ret_stb_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu); + helper_be_stw_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu); + helper_be_stl_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu); + helper_be_stq_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu); + helper_le_stw_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu); + helper_le_stl_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val, MemOpIdx oi, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu); + helper_le_stq_mmu(env, addr, val, oi, retaddr); + plugin_store_cb(env, addr, oi); } void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val, -- 2.11.0