From 1b15a97d4cf99efdd60e60e0bfd2d185174ab0eb Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 13 Aug 2021 17:11:52 +0100 Subject: [PATCH] target/arm: Implement MVE shift-by-scalar Implement the MVE instructions which perform shifts by a scalar. These are VSHL T2, VRSHL T2, VQSHL T1 and VQRSHL T2. They take the shift amount in a general purpose register and shift every element in the vector by that amount. Mostly we can reuse the helper functions for shift-by-immediate; we do need two new helpers for VQRSHL. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- target/arm/helper-mve.h | 8 ++++++++ target/arm/mve.decode | 23 ++++++++++++++++++++--- target/arm/mve_helper.c | 2 ++ target/arm/translate-mve.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 3 deletions(-) diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h index 715b1bbd01..0ee5ea3cab 100644 --- a/target/arm/helper-mve.h +++ b/target/arm/helper-mve.h @@ -414,6 +414,14 @@ DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) diff --git a/target/arm/mve.decode b/target/arm/mve.decode index 226b74790b..eb26b103d1 100644 --- a/target/arm/mve.decode +++ b/target/arm/mve.decode @@ -39,6 +39,7 @@ &viwdup qd rn rm size imm &vcmp qm qn size mask &vcmp_scalar qn rm size mask +&shl_scalar qda rm size @vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0 # Note that both Rn and Qd are 3 bits only (no D bit) @@ -88,6 +89,8 @@ @2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \ size=2 shift=%rshift_i5 +@shl_scalar .... .... .... size:2 .. .... .... .... rm:4 &shl_scalar qda=%qd + # Vector comparison; 4-bit Qm but 3-bit Qn %mask_22_13 22:1 13:3 @vcmp .... .... .. size:2 qn:3 . .... .... .... .... &vcmp qm=%qm mask=%mask_22_13 @@ -320,7 +323,23 @@ VRMLSLDAVH 1111 1110 1 ... ... 0 ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav_no VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar VSUB_scalar 1110 1110 0 . .. ... 1 ... 1 1111 . 100 .... @2scalar -VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar + +{ + VSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar + VRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar + VQSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar + VQRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar + VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar +} + +{ + VSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar + VRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar + VQSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar + VQRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar + VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar +} + VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar @@ -340,8 +359,6 @@ VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar size=%size_28 } -VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar - VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index ab02a1e60f..ac608fc524 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -1334,6 +1334,8 @@ DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP) DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP) DO_2SHIFT_U(vrshli_u, DO_VRSHLU) DO_2SHIFT_S(vrshli_s, DO_VRSHLS) +DO_2SHIFT_SAT_U(vqrshli_u, DO_UQRSHL_OP) +DO_2SHIFT_SAT_S(vqrshli_s, DO_SQRSHL_OP) /* Shift-and-insert; we always work with 64 bits at a time */ #define DO_2SHIFT_INSERT(OP, ESIZE, SHIFTFN, MASKFN) \ diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index b56c91db2a..44731fc4eb 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -1003,6 +1003,52 @@ DO_2SHIFT(VRSHRI_U, vrshli_u, true) DO_2SHIFT(VSRI, vsri, false) DO_2SHIFT(VSLI, vsli, false) +static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, + MVEGenTwoOpShiftFn *fn) +{ + TCGv_ptr qda; + TCGv_i32 rm; + + if (!dc_isar_feature(aa32_mve, s) || + !mve_check_qreg_bank(s, a->qda) || + a->rm == 13 || a->rm == 15 || !fn) { + /* Rm cases are UNPREDICTABLE */ + return false; + } + if (!mve_eci_check(s) || !vfp_access_check(s)) { + return true; + } + + qda = mve_qreg_ptr(a->qda); + rm = load_reg(s, a->rm); + fn(cpu_env, qda, qda, rm); + tcg_temp_free_ptr(qda); + tcg_temp_free_i32(rm); + mve_update_eci(s); + return true; +} + +#define DO_2SHIFT_SCALAR(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ + { \ + static MVEGenTwoOpShiftFn * const fns[] = { \ + gen_helper_mve_##FN##b, \ + gen_helper_mve_##FN##h, \ + gen_helper_mve_##FN##w, \ + NULL, \ + }; \ + return do_2shift_scalar(s, a, fns[a->size]); \ + } + +DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) +DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) +DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) +DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) +DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) +DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) +DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) +DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) + #define DO_VSHLL(INSN, FN) \ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ { \ -- 2.11.0