From c2b33d0be998bf539953f1dad0aa0d1cc8d9d069 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Mon, 6 Mar 2023 18:58:28 -0800 Subject: [PATCH] Hexagon (target/hexagon) Improve code gen for predicated HVX instructions The following improvements are made for predicated HVX instructions During gen_commit_hvx, unconditionally move the "new" value into the dest Don't set slot_cancelled Remove runtime bookkeeping of which registers were updated Reduce the cases where gen_log_vreg_write[_pair] is called It's only needed for special operands VxxV and VyV Remove gen_log_qreg_write Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230307025828.1612809-15-tsimpson@quicinc.com> --- target/hexagon/README | 21 ++++---------- target/hexagon/cpu.h | 5 +--- target/hexagon/gen_analyze_funcs.py | 3 +- target/hexagon/gen_tcg_funcs.py | 35 +++++----------------- target/hexagon/gen_tcg_hvx.h | 17 +---------- target/hexagon/genptr.c | 48 ++++-------------------------- target/hexagon/translate.c | 58 ++++--------------------------------- target/hexagon/translate.h | 15 ++++------ 8 files changed, 34 insertions(+), 168 deletions(-) diff --git a/target/hexagon/README b/target/hexagon/README index 365606f1e6..ebafc78b1c 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -136,12 +136,9 @@ For HVX vectors, the generator behaves slightly differently. The wide vectors won't fit in a TCGv or TCGv_i64, so we pass TCGv_ptr variables to pass the address to helper functions. Here's an example for an HVX vector-add-word istruction. - static void generate_V6_vaddw( - CPUHexagonState *env, - DisasContext *ctx, - Insn *insn, - Packet *pkt) + static void generate_V6_vaddw(DisasContext *ctx) { + Insn *insn __attribute__((unused)) = ctx->insn; const int VdN = insn->regno[0]; const intptr_t VdV_off = ctx_future_vreg_off(ctx, VdN, 1, true); @@ -157,9 +154,7 @@ istruction. TCGv_ptr VvV = tcg_temp_new_ptr(); tcg_gen_addi_ptr(VuV, cpu_env, VuV_off); tcg_gen_addi_ptr(VvV, cpu_env, VvV_off); - TCGv slot = tcg_constant_tl(insn->slot); - gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV, slot); - gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false); + gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV); } Notice that we also generate a variable named _off for each operand of @@ -172,12 +167,9 @@ functions from tcg-op-gvec.h. Here's the override for this instruction. Finally, we notice that the override doesn't use the TCGv_ptr variables, so we don't generate them when an override is present. Here is what we generate when the override is present. - static void generate_V6_vaddw( - CPUHexagonState *env, - DisasContext *ctx, - Insn *insn, - Packet *pkt) + static void generate_V6_vaddw(DisasContext *ctx) { + Insn *insn __attribute__((unused)) = ctx->insn; const int VdN = insn->regno[0]; const intptr_t VdV_off = ctx_future_vreg_off(ctx, VdN, 1, true); @@ -188,7 +180,6 @@ when the override is present. const intptr_t VvV_off = vreg_src_off(ctx, VvN); fGEN_TCG_V6_vaddw({ fHIDE(int i;) fVFOREACH(32, i) { VdV.w[i] = VuV.w[i] + VvV.w[i] ; } }); - gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false); } We also generate an analyze_ function for each instruction. Currently, @@ -281,10 +272,8 @@ For Hexagon Vector eXtensions (HVX), the following fields are used VRegs Vector registers future_VRegs Registers to be stored during packet commit tmp_VRegs Temporary registers *not* stored during commit - VRegs_updated Mask of predicated vector writes QRegs Q (vector predicate) registers future_QRegs Registers to be stored during packet commit - QRegs_updated Mask of predicated vector writes *** Debugging *** diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 34c0ae0a67..81b663ecfb 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -111,11 +111,8 @@ typedef struct CPUArchState { MMVector future_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16); MMVector tmp_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16); - VRegMask VRegs_updated; - MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16); MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16); - QRegMask QRegs_updated; /* Temporaries used within instructions */ MMVectorPair VuuV QEMU_ALIGNED(16); diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index 0bb4fcb476..ebd3e7afb9 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -110,8 +110,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno): if (regid in {"d", "e", "x"}): f.write(" const int %s = insn->regno[%d];\n" % \ (regN, regno)) - f.write(" ctx_log_qreg_write(ctx, %s, %s);\n" % \ - (regN, predicated)) + f.write(" ctx_log_qreg_write(ctx, %s);\n" % (regN)) elif (regid in {"s", "t", "u", "v"}): f.write("// const int %s = insn->regno[%d];\n" % \ (regN, regno)) diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index 9336528e50..fa93e185ce 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -176,8 +176,7 @@ def genptr_decl(f, tag, regtype, regid, regno): (regtype, regid, regno)) f.write(" const intptr_t %s%sV_off =\n" % \ (regtype, regid)) - f.write(" offsetof(CPUHexagonState,\n") - f.write(" future_QRegs[%s%sN]);\n" % \ + f.write(" get_result_qreg(ctx, %s%sN);\n" % \ (regtype, regid)) if (not hex_common.skip_qemu_helper(tag)): f.write(" TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \ @@ -406,36 +405,18 @@ def genptr_dst_write(f, tag, regtype, regid): def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): if (regtype == "V"): - if (regid in {"dd", "xx", "yy"}): - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - is_predicated = "true" - else: - is_predicated = "false" + if (regid in {"xx"}): f.write(" gen_log_vreg_write_pair(ctx, %s%sV_off, %s%sN, " % \ (regtype, regid, regtype, regid)) - f.write("%s, insn->slot, %s);\n" % \ - (newv, is_predicated)) - elif (regid in {"d", "x", "y"}): - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - is_predicated = "true" - else: - is_predicated = "false" - f.write(" gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s, " % \ + f.write("%s);\n" % \ + (newv)) + elif (regid in {"y"}): + f.write(" gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s);\n" % \ (regtype, regid, regtype, regid, newv)) - f.write("insn->slot, %s);\n" % \ - (is_predicated)) - else: + elif (regid not in {"dd", "d", "x"}): print("Bad register parse: ", regtype, regid) elif (regtype == "Q"): - if (regid in {"d", "e", "x"}): - if ('A_CONDEXEC' in hex_common.attribdict[tag]): - is_predicated = "true" - else: - is_predicated = "false" - f.write(" gen_log_qreg_write(%s%sV_off, %s%sN, %s, " % \ - (regtype, regid, regtype, regid, newv)) - f.write("insn->slot, %s);\n" % (is_predicated)) - else: + if (regid not in {"d", "e", "x"}): print("Bad register parse: ", regtype, regid) else: print("Bad register parse: ", regtype, regid) diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h index 94f272e286..d4aefe8e3f 100644 --- a/target/hexagon/gen_tcg_hvx.h +++ b/target/hexagon/gen_tcg_hvx.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -133,16 +133,11 @@ static inline void assert_vhist_tmp(DisasContext *ctx) do { \ TCGv lsb = tcg_temp_new(); \ TCGLabel *false_label = gen_new_label(); \ - TCGLabel *end_label = gen_new_label(); \ tcg_gen_andi_tl(lsb, PsV, 1); \ tcg_gen_brcondi_tl(TCG_COND_NE, lsb, PRED, false_label); \ tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ sizeof(MMVector), sizeof(MMVector)); \ - tcg_gen_br(end_label); \ gen_set_label(false_label); \ - tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \ - 1 << insn->slot); \ - gen_set_label(end_label); \ } while (0) @@ -547,17 +542,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx) do { \ TCGv LSB = tcg_temp_new(); \ TCGLabel *false_label = gen_new_label(); \ - TCGLabel *end_label = gen_new_label(); \ GET_EA; \ PRED; \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \ gen_vreg_load(ctx, DSTOFF, EA, true); \ INC; \ - tcg_gen_br(end_label); \ gen_set_label(false_label); \ - tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \ - 1 << insn->slot); \ - gen_set_label(end_label); \ } while (0) #define fGEN_TCG_PRED_VEC_LOAD_pred_pi \ @@ -717,17 +707,12 @@ static inline void assert_vhist_tmp(DisasContext *ctx) do { \ TCGv LSB = tcg_temp_new(); \ TCGLabel *false_label = gen_new_label(); \ - TCGLabel *end_label = gen_new_label(); \ GET_EA; \ PRED; \ tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \ gen_vreg_store(ctx, EA, SRCOFF, insn->slot, ALIGN); \ INC; \ - tcg_gen_br(end_label); \ gen_set_label(false_label); \ - tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \ - 1 << insn->slot); \ - gen_set_label(end_label); \ } while (0) #define fGEN_TCG_PRED_VEC_STORE_pred_pi(ALIGN) \ diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index da5dbc83cf..bb274d4a71 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -985,68 +985,32 @@ static intptr_t vreg_src_off(DisasContext *ctx, int num) } static void gen_log_vreg_write(DisasContext *ctx, intptr_t srcoff, int num, - VRegWriteType type, int slot_num, - bool is_predicated) + VRegWriteType type) { - TCGLabel *label_end = NULL; intptr_t dstoff; - if (is_predicated) { - TCGv cancelled = tcg_temp_new(); - label_end = gen_new_label(); - - /* Don't do anything if the slot was cancelled */ - tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); - tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); - } - if (type != EXT_TMP) { dstoff = ctx_future_vreg_off(ctx, num, 1, true); tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMVector), sizeof(MMVector)); - tcg_gen_ori_tl(hex_VRegs_updated, hex_VRegs_updated, 1 << num); } else { dstoff = ctx_tmp_vreg_off(ctx, num, 1, false); tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMVector), sizeof(MMVector)); } - - if (is_predicated) { - gen_set_label(label_end); - } } static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num, - VRegWriteType type, int slot_num, - bool is_predicated) + VRegWriteType type) { - gen_log_vreg_write(ctx, srcoff, num ^ 0, type, slot_num, is_predicated); + gen_log_vreg_write(ctx, srcoff, num ^ 0, type); srcoff += sizeof(MMVector); - gen_log_vreg_write(ctx, srcoff, num ^ 1, type, slot_num, is_predicated); + gen_log_vreg_write(ctx, srcoff, num ^ 1, type); } -static void gen_log_qreg_write(intptr_t srcoff, int num, int vnew, - int slot_num, bool is_predicated) +static intptr_t get_result_qreg(DisasContext *ctx, int qnum) { - TCGLabel *label_end = NULL; - intptr_t dstoff; - - if (is_predicated) { - TCGv cancelled = tcg_temp_new(); - label_end = gen_new_label(); - - /* Don't do anything if the slot was cancelled */ - tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); - tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); - } - - dstoff = offsetof(CPUHexagonState, future_QRegs[num]); - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMQReg), sizeof(MMQReg)); - - if (is_predicated) { - tcg_gen_ori_tl(hex_QRegs_updated, hex_QRegs_updated, 1 << num); - gen_set_label(label_end); - } + return offsetof(CPUHexagonState, future_QRegs[qnum]); } static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src, diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 1e3a76aece..665476ab48 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -56,8 +56,6 @@ TCGv hex_dczero_addr; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; -TCGv hex_VRegs_updated; -TCGv hex_QRegs_updated; TCGv hex_vstore_addr[VSTORES_MAX]; TCGv hex_vstore_size[VSTORES_MAX]; TCGv hex_vstore_pending[VSTORES_MAX]; @@ -248,12 +246,11 @@ static bool check_for_attrib(Packet *pkt, int attrib) static bool need_slot_cancelled(Packet *pkt) { - /* We only need slot_cancelled for conditional store and HVX instructions */ + /* We only need slot_cancelled for conditional store instructions */ for (int i = 0; i < pkt->num_insns; i++) { uint16_t opcode = pkt->insn[i].opcode; if (GET_ATTRIB(opcode, A_CONDEXEC) && - (GET_ATTRIB(opcode, A_STORE) || - GET_ATTRIB(opcode, A_CVI))) { + GET_ATTRIB(opcode, A_SCALAR_STORE)) { return true; } } @@ -453,11 +450,6 @@ static void gen_start_packet(DisasContext *ctx) i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1); } } - - if (pkt->pkt_has_hvx) { - tcg_gen_movi_tl(hex_VRegs_updated, 0); - tcg_gen_movi_tl(hex_QRegs_updated, 0); - } } bool is_gather_store_insn(DisasContext *ctx) @@ -725,65 +717,31 @@ static void gen_commit_hvx(DisasContext *ctx) /* * for (i = 0; i < ctx->vreg_log_idx; i++) { * int rnum = ctx->vreg_log[i]; - * if (ctx->vreg_is_predicated[i]) { - * if (env->VRegs_updated & (1 << rnum)) { - * env->VRegs[rnum] = env->future_VRegs[rnum]; - * } - * } else { - * env->VRegs[rnum] = env->future_VRegs[rnum]; - * } + * env->VRegs[rnum] = env->future_VRegs[rnum]; * } */ for (i = 0; i < ctx->vreg_log_idx; i++) { int rnum = ctx->vreg_log[i]; - bool is_predicated = ctx->vreg_is_predicated[i]; intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); size_t size = sizeof(MMVector); - if (is_predicated) { - TCGv cmp = tcg_temp_new(); - TCGLabel *label_skip = gen_new_label(); - - tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum); - tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip); - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); - gen_set_label(label_skip); - } else { - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); - } + tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); } /* * for (i = 0; i < ctx->qreg_log_idx; i++) { * int rnum = ctx->qreg_log[i]; - * if (ctx->qreg_is_predicated[i]) { - * if (env->QRegs_updated) & (1 << rnum)) { - * env->QRegs[rnum] = env->future_QRegs[rnum]; - * } - * } else { - * env->QRegs[rnum] = env->future_QRegs[rnum]; - * } + * env->QRegs[rnum] = env->future_QRegs[rnum]; * } */ for (i = 0; i < ctx->qreg_log_idx; i++) { int rnum = ctx->qreg_log[i]; - bool is_predicated = ctx->qreg_is_predicated[i]; intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); size_t size = sizeof(MMQReg); - if (is_predicated) { - TCGv cmp = tcg_temp_new(); - TCGLabel *label_skip = gen_new_label(); - - tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum); - tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip); - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); - gen_set_label(label_skip); - } else { - tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); - } + tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); } if (pkt_has_hvx_store(ctx->pkt)) { @@ -1129,10 +1087,6 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, llsc_val), "llsc_val"); hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env, offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); - hex_VRegs_updated = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated"); - hex_QRegs_updated = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated"); for (i = 0; i < STORES_MAX; i++) { snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); hex_store_addr[i] = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 6e59a90c41..db832b0f88 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -49,7 +49,6 @@ typedef struct DisasContext { int tmp_vregs_idx; int tmp_vregs_num[VECTOR_TEMPS_MAX]; int vreg_log[NUM_VREGS]; - bool vreg_is_predicated[NUM_VREGS]; int vreg_log_idx; DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS); DECLARE_BITMAP(vregs_updated, NUM_VREGS); @@ -57,7 +56,6 @@ typedef struct DisasContext { DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS); DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS); int qreg_log[NUM_QREGS]; - bool qreg_is_predicated[NUM_QREGS]; int qreg_log_idx; bool pre_commit; TCGCond branch_cond; @@ -111,9 +109,11 @@ static inline void ctx_log_vreg_write(DisasContext *ctx, bool is_predicated) { if (type != EXT_TMP) { - ctx->vreg_log[ctx->vreg_log_idx] = rnum; - ctx->vreg_is_predicated[ctx->vreg_log_idx] = is_predicated; - ctx->vreg_log_idx++; + if (!test_bit(rnum, ctx->vregs_updated)) { + ctx->vreg_log[ctx->vreg_log_idx] = rnum; + ctx->vreg_log_idx++; + set_bit(rnum, ctx->vregs_updated); + } set_bit(rnum, ctx->vregs_updated); if (is_predicated) { @@ -140,10 +140,9 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx, } static inline void ctx_log_qreg_write(DisasContext *ctx, - int rnum, bool is_predicated) + int rnum) { ctx->qreg_log[ctx->qreg_log_idx] = rnum; - ctx->qreg_is_predicated[ctx->qreg_log_idx] = is_predicated; ctx->qreg_log_idx++; } @@ -164,8 +163,6 @@ extern TCGv hex_dczero_addr; extern TCGv hex_llsc_addr; extern TCGv hex_llsc_val; extern TCGv_i64 hex_llsc_val_i64; -extern TCGv hex_VRegs_updated; -extern TCGv hex_QRegs_updated; extern TCGv hex_vstore_addr[VSTORES_MAX]; extern TCGv hex_vstore_size[VSTORES_MAX]; extern TCGv hex_vstore_pending[VSTORES_MAX]; -- 2.11.0