From c74cc082a6d3f8fde7778d26f600967582741967 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 6 Jun 2023 10:19:35 +0100 Subject: [PATCH] target/arm: Use tcg_gen_qemu_ld_i128 for LDXP While we don't require 16-byte atomicity here, using a single larger load simplifies the code, and makes it a closer match to STXP. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20230530191438.411344-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/tcg/translate-a64.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 1d34c5a703..1fff74c73a 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -2386,14 +2386,14 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i64 addr, int size, bool is_pair) { int idx = get_mem_index(s); - MemOp memop = s->be_data; + MemOp memop; g_assert(size <= 3); if (is_pair) { g_assert(size >= 2); if (size == 2) { /* The pair must be single-copy atomic for the doubleword. */ - memop |= MO_64 | MO_ALIGN; + memop = finalize_memop(s, MO_64 | MO_ALIGN); tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); if (s->be_data == MO_LE) { tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); @@ -2403,21 +2403,30 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); } } else { - /* The pair must be single-copy atomic for *each* doubleword, not - the entire quadword, however it must be quadword aligned. */ - memop |= MO_64; - tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, - memop | MO_ALIGN_16); + /* + * The pair must be single-copy atomic for *each* doubleword, not + * the entire quadword, however it must be quadword aligned. + * Expose the complete load to tcg, for ease of tlb lookup, + * but indicate that only 8-byte atomicity is required. + */ + TCGv_i128 t16 = tcg_temp_new_i128(); - TCGv_i64 addr2 = tcg_temp_new_i64(); - tcg_gen_addi_i64(addr2, addr, 8); - tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); + memop = finalize_memop_atom(s, MO_128 | MO_ALIGN_16, + MO_ATOM_IFALIGN_PAIR); + tcg_gen_qemu_ld_i128(t16, addr, idx, memop); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(cpu_exclusive_val, + cpu_exclusive_high, t16); + } else { + tcg_gen_extr_i128_i64(cpu_exclusive_high, + cpu_exclusive_val, t16); + } tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); } } else { - memop |= size | MO_ALIGN; + memop = finalize_memop(s, size | MO_ALIGN); tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); } -- 2.11.0