X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=sim%2Frx%2Frx.c;fp=sim%2Frx%2Frx.c;h=6d9d9d572e04093498a6ee976bb4cdec1042f4ca;hb=cbad6d5ba66a94967eaf14df64463a4abca700d6;hp=f1ce809014df8a2b7d761cb8167059d2996303de;hpb=b110964dfb2d0779466e4e604983d3afa83993a4;p=pf3gnuchains%2Fpf3gnuchains4x.git diff --git a/sim/rx/rx.c b/sim/rx/rx.c index f1ce809014..6d9d9d572e 100644 --- a/sim/rx/rx.c +++ b/sim/rx/rx.c @@ -1,6 +1,6 @@ /* rx.c --- opcode semantics for stand-alone RX simulator. -Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. +Copyright (C) 2008-2012 Free Software Foundation, Inc. Contributed by Red Hat, Inc. This file is part of the GNU simulators. @@ -18,6 +18,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ +#include "config.h" #include #include #include @@ -29,12 +30,301 @@ along with this program. If not, see . */ #include "syscalls.h" #include "fpu.h" #include "err.h" +#include "misc.h" + +#ifdef CYCLE_STATS +static const char * id_names[] = { + "RXO_unknown", + "RXO_mov", /* d = s (signed) */ + "RXO_movbi", /* d = [s,s2] (signed) */ + "RXO_movbir", /* [s,s2] = d (signed) */ + "RXO_pushm", /* s..s2 */ + "RXO_popm", /* s..s2 */ + "RXO_xchg", /* s <-> d */ + "RXO_stcc", /* d = s if cond(s2) */ + "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */ + + /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note + that d may be "None". */ + "RXO_and", + "RXO_or", + "RXO_xor", + "RXO_add", + "RXO_sub", + "RXO_mul", + "RXO_div", + "RXO_divu", + "RXO_shll", + "RXO_shar", + "RXO_shlr", + + "RXO_adc", /* d = d + s + carry */ + "RXO_sbb", /* d = d - s - ~carry */ + "RXO_abs", /* d = |s| */ + "RXO_max", /* d = max(d,s) */ + "RXO_min", /* d = min(d,s) */ + "RXO_emul", /* d:64 = d:32 * s */ + "RXO_emulu", /* d:64 = d:32 * s (unsigned) */ + + "RXO_rolc", /* d <<= 1 through carry */ + "RXO_rorc", /* d >>= 1 through carry*/ + "RXO_rotl", /* d <<= #s without carry */ + "RXO_rotr", /* d >>= #s without carry*/ + "RXO_revw", /* d = revw(s) */ + "RXO_revl", /* d = revl(s) */ + "RXO_branch", /* pc = d if cond(s) */ + "RXO_branchrel",/* pc += d if cond(s) */ + "RXO_jsr", /* pc = d */ + "RXO_jsrrel", /* pc += d */ + "RXO_rts", + "RXO_nop", + "RXO_nop2", + "RXO_nop3", + + "RXO_scmpu", + "RXO_smovu", + "RXO_smovb", + "RXO_suntil", + "RXO_swhile", + "RXO_smovf", + "RXO_sstr", + + "RXO_rmpa", + "RXO_mulhi", + "RXO_mullo", + "RXO_machi", + "RXO_maclo", + "RXO_mvtachi", + "RXO_mvtaclo", + "RXO_mvfachi", + "RXO_mvfacmi", + "RXO_mvfaclo", + "RXO_racw", + + "RXO_sat", /* sat(d) */ + "RXO_satr", + + "RXO_fadd", /* d op= s */ + "RXO_fcmp", + "RXO_fsub", + "RXO_ftoi", + "RXO_fmul", + "RXO_fdiv", + "RXO_round", + "RXO_itof", + + "RXO_bset", /* d |= (1< = cond(s2) */ + + "RXO_clrpsw", /* flag index in d */ + "RXO_setpsw", /* flag index in d */ + "RXO_mvtipl", /* new IPL in s */ + + "RXO_rtfi", + "RXO_rte", + "RXO_rtd", /* undocumented */ + "RXO_brk", + "RXO_dbt", /* undocumented */ + "RXO_int", /* vector id in s */ + "RXO_stop", + "RXO_wait", + + "RXO_sccnd", /* d = cond(s) ? 1 : 0 */ +}; + +static const char * optype_names[] = { + " - ", + "#Imm", /* #addend */ + " Rn ", /* Rn */ + "[Rn]", /* [Rn + addend] */ + "Ps++", /* [Rn+] */ + "--Pr", /* [-Rn] */ + " cc ", /* eq, gtu, etc */ + "Flag", /* [UIOSZC] */ + "RbRi" /* [Rb + scale * Ri] */ +}; + +#define N_RXO (sizeof(id_names)/sizeof(id_names[0])) +#define N_RXT (sizeof(optype_names)/sizeof(optype_names[0])) +#define N_MAP 30 + +static unsigned long long benchmark_start_cycle; +static unsigned long long benchmark_end_cycle; + +static int op_cache[N_RXT][N_RXT][N_RXT]; +static int op_cache_rev[N_MAP]; +static int op_cache_idx = 0; + +static int +op_lookup (int a, int b, int c) +{ + if (op_cache[a][b][c]) + return op_cache[a][b][c]; + op_cache_idx ++; + if (op_cache_idx >= N_MAP) + { + printf("op_cache_idx exceeds %d\n", N_MAP); + exit(1); + } + op_cache[a][b][c] = op_cache_idx; + op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c; + return op_cache_idx; +} + +static char * +op_cache_string (int map) +{ + static int ci; + static char cb[5][20]; + int a, b, c; + + map = op_cache_rev[map]; + a = (map >> 8) & 15; + b = (map >> 4) & 15; + c = (map >> 0) & 15; + ci = (ci + 1) % 5; + sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]); + return cb[ci]; +} + +static unsigned long long cycles_per_id[N_RXO][N_MAP]; +static unsigned long long times_per_id[N_RXO][N_MAP]; +static unsigned long long memory_stalls; +static unsigned long long register_stalls; +static unsigned long long branch_stalls; +static unsigned long long branch_alignment_stalls; +static unsigned long long fast_returns; + +static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP]; +static int prev_opcode_id = RXO_unknown; +static int po0; -#define tprintf if (trace) printf +#define STATS(x) x + +#else +#define STATS(x) +#endif /* CYCLE_STATS */ + + +#ifdef CYCLE_ACCURATE + +static int new_rt = -1; + +/* Number of cycles to add if an insn spans an 8-byte boundary. */ +static int branch_alignment_penalty = 0; + +#endif + +static int running_benchmark = 1; + +#define tprintf if (trace && running_benchmark) printf jmp_buf decode_jmp_buf; unsigned int rx_cycles = 0; +#ifdef CYCLE_ACCURATE +/* If nonzero, memory was read at some point and cycle latency might + take effect. */ +static int memory_source = 0; +/* If nonzero, memory was written and extra cycles might be + needed. */ +static int memory_dest = 0; + +static void +cycles (int throughput) +{ + tprintf("%d cycles\n", throughput); + regs.cycle_count += throughput; +} + +/* Number of execution (E) cycles the op uses. For memory sources, we + include the load micro-op stall as two extra E cycles. */ +#define E(c) cycles (memory_source ? c + 2 : c) +#define E1 cycles (1) +#define E2 cycles (2) +#define EBIT cycles (memory_source ? 2 : 1) + +/* Check to see if a read latency must be applied for a given register. */ +#define RL(r) \ + if (regs.rt == r ) \ + { \ + tprintf("register %d load stall\n", r); \ + regs.cycle_count ++; \ + STATS(register_stalls ++); \ + regs.rt = -1; \ + } + +#define RLD(r) \ + if (memory_source) \ + { \ + tprintf ("Rt now %d\n", r); \ + new_rt = r; \ + } + +static int +lsb_count (unsigned long v, int is_signed) +{ + int i, lsb; + if (is_signed && (v & 0x80000000U)) + v = (unsigned long)(long)(-v); + for (i=31; i>=0; i--) + if (v & (1 << i)) + { + /* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */ + lsb = (i + 2) / 2; + return lsb; + } + return 0; +} + +static int +divu_cycles(unsigned long num, unsigned long den) +{ + int nb = lsb_count (num, 0); + int db = lsb_count (den, 0); + int rv; + + if (nb < db) + rv = 2; + else + rv = 3 + nb - db; + E (rv); + return rv; +} + +static int +div_cycles(long num, long den) +{ + int nb = lsb_count ((unsigned long)num, 1); + int db = lsb_count ((unsigned long)den, 1); + int rv; + + if (nb < db) + rv = 3; + else + rv = 5 + nb - db; + E (rv); + return rv; +} + +#else /* !CYCLE_ACCURATE */ + +#define cycles(t) +#define E(c) +#define E1 +#define E2 +#define EBIT +#define RL(r) +#define RLD(r) + +#define divu_cycles(n,d) +#define div_cycles(n,d) + +#endif /* else CYCLE_ACCURATE */ + static int size2bytes[] = { 4, 1, 1, 1, 2, 2, 2, 3, 4 }; @@ -53,30 +343,50 @@ _rx_abort (const char *file, int line) abort(); } -static int -rx_get_byte (void *vdata) +static unsigned char *get_byte_base; +static RX_Opcode_Decoded **decode_cache_base; +static SI get_byte_page; + +void +reset_decoder (void) { - int saved_trace = trace; - unsigned char rv; + get_byte_base = 0; + decode_cache_base = 0; + get_byte_page = 0; +} - if (trace == 1) - trace = 0; +static inline void +maybe_get_mem_page (SI tpc) +{ + if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting) + { + get_byte_page = tpc & NONPAGE_MASK; + get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page; + decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page; + } +} +/* This gets called a *lot* so optimize it. */ +static int +rx_get_byte (void *vdata) +{ RX_Data *rx_data = (RX_Data *)vdata; + SI tpc = rx_data->dpc; + + /* See load.c for an explanation of this. */ if (rx_big_endian) - /* See load.c for an explanation of this. */ - rv = mem_get_pc (rx_data->dpc ^ 3); - else - rv = mem_get_pc (rx_data->dpc); + tpc ^= 3; + + maybe_get_mem_page (tpc); + rx_data->dpc ++; - trace = saved_trace; - return rv; + return get_byte_base [tpc]; } static int -get_op (RX_Opcode_Decoded *rd, int i) +get_op (const RX_Opcode_Decoded *rd, int i) { - RX_Opcode_Operand *o = rd->op + i; + const RX_Opcode_Operand *o = rd->op + i; int addr, rv = 0; switch (o->type) @@ -88,6 +398,7 @@ get_op (RX_Opcode_Decoded *rd, int i) return o->addend; case RX_Operand_Register: /* Rn */ + RL (o->reg); rv = get_reg (o->reg); break; @@ -96,8 +407,30 @@ get_op (RX_Opcode_Decoded *rd, int i) /* fall through */ case RX_Operand_Postinc: /* [Rn+] */ case RX_Operand_Indirect: /* [Rn + addend] */ + case RX_Operand_TwoReg: /* [Rn + scale * R2] */ +#ifdef CYCLE_ACCURATE + RL (o->reg); + if (o->type == RX_Operand_TwoReg) + RL (rd->op[2].reg); + regs.rt = -1; + if (regs.m2m == M2M_BOTH) + { + tprintf("src memory stall\n"); +#ifdef CYCLE_STATS + memory_stalls ++; +#endif + regs.cycle_count ++; + regs.m2m = 0; + } + + memory_source = 1; +#endif + + if (o->type == RX_Operand_TwoReg) + addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg); + else + addr = get_reg (o->reg) + o->addend; - addr = get_reg (o->reg) + o->addend; switch (o->size) { case RX_AnySize: @@ -178,9 +511,9 @@ get_op (RX_Opcode_Decoded *rd, int i) } static void -put_op (RX_Opcode_Decoded *rd, int i, int v) +put_op (const RX_Opcode_Decoded *rd, int i, int v) { - RX_Opcode_Operand *o = rd->op + i; + const RX_Opcode_Operand *o = rd->op + i; int addr; switch (o->size) @@ -234,6 +567,7 @@ put_op (RX_Opcode_Decoded *rd, int i, int v) case RX_Operand_Register: /* Rn */ put_reg (o->reg, v); + RLD (o->reg); break; case RX_Operand_Predec: /* [-Rn] */ @@ -241,8 +575,26 @@ put_op (RX_Opcode_Decoded *rd, int i, int v) /* fall through */ case RX_Operand_Postinc: /* [Rn+] */ case RX_Operand_Indirect: /* [Rn + addend] */ + case RX_Operand_TwoReg: /* [Rn + scale * R2] */ + +#ifdef CYCLE_ACCURATE + if (regs.m2m == M2M_BOTH) + { + tprintf("dst memory stall\n"); + regs.cycle_count ++; +#ifdef CYCLE_STATS + memory_stalls ++; +#endif + regs.m2m = 0; + } + memory_dest = 1; +#endif + + if (o->type == RX_Operand_TwoReg) + addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg); + else + addr = get_reg (o->reg) + o->addend; - addr = get_reg (o->reg) + o->addend; switch (o->size) { case RX_AnySize: @@ -283,19 +635,19 @@ put_op (RX_Opcode_Decoded *rd, int i, int v) } } -#define PD(x) put_op (&opcode, 0, x) -#define PS(x) put_op (&opcode, 1, x) -#define PS2(x) put_op (&opcode, 2, x) -#define GD() get_op (&opcode, 0) -#define GS() get_op (&opcode, 1) -#define GS2() get_op (&opcode, 2) -#define DSZ() size2bytes[opcode.op[0].size] -#define SSZ() size2bytes[opcode.op[0].size] -#define S2SZ() size2bytes[opcode.op[0].size] +#define PD(x) put_op (opcode, 0, x) +#define PS(x) put_op (opcode, 1, x) +#define PS2(x) put_op (opcode, 2, x) +#define GD() get_op (opcode, 0) +#define GS() get_op (opcode, 1) +#define GS2() get_op (opcode, 2) +#define DSZ() size2bytes[opcode->op[0].size] +#define SSZ() size2bytes[opcode->op[0].size] +#define S2SZ() size2bytes[opcode->op[0].size] /* "Universal" sources. */ -#define US1() ((opcode.op[2].type == RX_Operand_None) ? GD() : GS()) -#define US2() ((opcode.op[2].type == RX_Operand_None) ? GS() : GS2()) +#define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS()) +#define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2()) static void push(int val) @@ -345,8 +697,8 @@ poppc() #define MATH_OP(vop,c) \ { \ - uma = US1(); \ umb = US2(); \ + uma = US1(); \ ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \ tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \ ma = sign_ext (uma, DSZ() * 8); \ @@ -355,23 +707,25 @@ poppc() tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \ set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \ PD (sll); \ + E (1); \ } #define LOGIC_OP(vop) \ { \ - ma = US1(); \ mb = US2(); \ + ma = US1(); \ v = ma vop mb; \ tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \ set_sz (v, DSZ()); \ PD(v); \ + E (1); \ } #define SHIFT_OP(val, type, count, OP, carry_mask) \ { \ int i, c=0; \ - val = (type)US1(); \ count = US2(); \ + val = (type)US1(); \ tprintf("%lld " #OP " %d\n", val, count); \ for (i = 0; i < count; i ++) \ { \ @@ -443,8 +797,8 @@ fop_fsub (fp_t s1, fp_t s2, fp_t *d) int do_store; \ fp_t fa, fb, fc; \ FPCLEAR(); \ - fa = GD (); \ fb = GS (); \ + fa = GD (); \ do_store = fop_##func (fa, fb, &fc); \ tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \ FPCHECK(); \ @@ -549,6 +903,23 @@ do_fp_exception (unsigned long opcode_pc) return RX_MAKE_STEPPED (); } +static int +op_is_memory (const RX_Opcode_Decoded *rd, int i) +{ + switch (rd->op[i].type) + { + case RX_Operand_Predec: + case RX_Operand_Postinc: + case RX_Operand_Indirect: + return 1; + default: + return 0; + } +} +#define OM(i) op_is_memory (opcode, i) + +#define DO_RETURN(x) { longjmp (decode_jmp_buf, x); } + int decode_opcode () { @@ -559,23 +930,67 @@ decode_opcode () long long sll; unsigned long opcode_pc; RX_Data rx_data; - RX_Opcode_Decoded opcode; - int rv; - - if ((rv = setjmp (decode_jmp_buf))) - return rv; + const RX_Opcode_Decoded *opcode; +#ifdef CYCLE_STATS + unsigned long long prev_cycle_count; +#endif +#ifdef CYCLE_ACCURATE + unsigned int tx; +#endif + +#ifdef CYCLE_STATS + prev_cycle_count = regs.cycle_count; +#endif + +#ifdef CYCLE_ACCURATE + memory_source = 0; + memory_dest = 0; +#endif rx_cycles ++; - rx_data.dpc = opcode_pc = regs.r_pc; - opcode_size = rx_decode_opcode (opcode_pc, &opcode, rx_get_byte, &rx_data); + maybe_get_mem_page (regs.r_pc); + + opcode_pc = regs.r_pc; + + /* Note that we don't word-swap this point, there's no point. */ + if (decode_cache_base[opcode_pc] == NULL) + { + RX_Opcode_Decoded *opcode_w; + rx_data.dpc = opcode_pc; + opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded)); + opcode_size = rx_decode_opcode (opcode_pc, opcode_w, + rx_get_byte, &rx_data); + opcode = opcode_w; + } + else + { + opcode = decode_cache_base[opcode_pc]; + opcode_size = opcode->n_bytes; + } + +#ifdef CYCLE_ACCURATE + if (branch_alignment_penalty) + { + if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7) + { + tprintf("1 cycle branch alignment penalty\n"); + cycles (branch_alignment_penalty); +#ifdef CYCLE_STATS + branch_alignment_stalls ++; +#endif + } + branch_alignment_penalty = 0; + } +#endif + regs.r_pc += opcode_size; - rx_flagmask = opcode.flags_s; - rx_flagand = ~(int)opcode.flags_0; - rx_flagor = opcode.flags_1; + rx_flagmask = opcode->flags_s; + rx_flagand = ~(int)opcode->flags_0; + rx_flagor = opcode->flags_1; - switch (opcode.id) + switch (opcode->id) { case RXO_abs: sll = GS (); @@ -585,6 +1000,7 @@ decode_opcode () tprintf("%lld\n", sll); PD (sll); set_osz (sll, 4); + E (1); break; case RXO_adc: @@ -602,18 +1018,19 @@ decode_opcode () case RXO_bclr: ma = GD (); mb = GS (); - if (opcode.op[0].type == RX_Operand_Register) + if (opcode->op[0].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; ma &= ~(1 << mb); PD (ma); + EBIT; break; case RXO_bmcc: ma = GD (); mb = GS (); - if (opcode.op[0].type == RX_Operand_Register) + if (opcode->op[0].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; @@ -622,75 +1039,132 @@ decode_opcode () else ma &= ~(1 << mb); PD (ma); + EBIT; break; case RXO_bnot: ma = GD (); mb = GS (); - if (opcode.op[0].type == RX_Operand_Register) + if (opcode->op[0].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; ma ^= (1 << mb); PD (ma); + EBIT; break; case RXO_branch: - if (GS()) - regs.r_pc = GD(); + if (opcode->op[1].type == RX_Operand_None || GS()) + { +#ifdef CYCLE_ACCURATE + SI old_pc = regs.r_pc; + int delta; +#endif + regs.r_pc = GD(); +#ifdef CYCLE_ACCURATE + delta = regs.r_pc - old_pc; + if (delta >= 0 && delta < 16 + && opcode_size > 1) + { + tprintf("near forward branch bonus\n"); + cycles (2); + } + else + { + cycles (3); + branch_alignment_penalty = 1; + } +#ifdef CYCLE_STATS + branch_stalls ++; +#endif +#endif + } +#ifdef CYCLE_ACCURATE + else + cycles (1); +#endif break; case RXO_branchrel: - if (GS()) - regs.r_pc += GD(); + if (opcode->op[1].type == RX_Operand_None || GS()) + { + int delta = GD(); + regs.r_pc = opcode_pc + delta; +#ifdef CYCLE_ACCURATE + /* Note: specs say 3, chip says 2. */ + if (delta >= 0 && delta < 16 + && opcode_size > 1) + { + tprintf("near forward branch bonus\n"); + cycles (2); + } + else + { + cycles (3); + branch_alignment_penalty = 1; + } +#ifdef CYCLE_STATS + branch_stalls ++; +#endif +#endif + } +#ifdef CYCLE_ACCURATE + else + cycles (1); +#endif break; case RXO_brk: { int old_psw = regs.r_psw; if (rx_in_gdb) - return RX_MAKE_HIT_BREAK (); + DO_RETURN (RX_MAKE_HIT_BREAK ()); if (regs.r_intb == 0) { tprintf("BREAK hit, no vector table.\n"); - return RX_MAKE_EXITED(1); + DO_RETURN (RX_MAKE_EXITED(1)); } regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM); pushpc (old_psw); pushpc (regs.r_pc); regs.r_pc = mem_get_si (regs.r_intb); + cycles(6); } break; case RXO_bset: ma = GD (); mb = GS (); - if (opcode.op[0].type == RX_Operand_Register) + if (opcode->op[0].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; ma |= (1 << mb); PD (ma); + EBIT; break; case RXO_btst: ma = GS (); mb = GS2 (); - if (opcode.op[1].type == RX_Operand_Register) + if (opcode->op[1].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; umb = ma & (1 << mb); set_zc (! umb, umb); + EBIT; break; case RXO_clrpsw: - v = 1 << opcode.op[0].reg; + v = 1 << opcode->op[0].reg; if (FLAG_PM && (v == FLAGBIT_I || v == FLAGBIT_U)) break; regs.r_psw &= ~v; + cycles (1); break; case RXO_div: /* d = d / s */ @@ -701,6 +1175,7 @@ decode_opcode () { tprintf("#NAN\n"); set_flags (FLAGBIT_O, FLAGBIT_O); + cycles (3); } else { @@ -708,6 +1183,7 @@ decode_opcode () tprintf("%d\n", v); set_flags (FLAGBIT_O, 0); PD (v); + div_cycles (mb, ma); } break; @@ -719,6 +1195,7 @@ decode_opcode () { tprintf("#NAN\n"); set_flags (FLAGBIT_O, FLAGBIT_O); + cycles (2); } else { @@ -726,48 +1203,7 @@ decode_opcode () tprintf("%u\n", v); set_flags (FLAGBIT_O, 0); PD (v); - } - break; - - case RXO_ediv: - ma = GS(); - mb = GD(); - tprintf("%d / %d = ", mb, ma); - if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000)) - { - tprintf("#NAN\n"); - set_flags (FLAGBIT_O, FLAGBIT_O); - } - else - { - v = mb/ma; - mb = mb%ma; - tprintf("%d, rem %d\n", v, mb); - set_flags (FLAGBIT_O, 0); - PD (v); - opcode.op[0].reg ++; - PD (mb); - } - break; - - case RXO_edivu: - uma = GS(); - umb = GD(); - tprintf("%u / %u = ", umb, uma); - if (uma == 0) - { - tprintf("#NAN\n"); - set_flags (FLAGBIT_O, FLAGBIT_O); - } - else - { - v = umb/uma; - umb = umb%uma; - tprintf("%u, rem %u\n", v, umb); - set_flags (FLAGBIT_O, 0); - PD (v); - opcode.op[0].reg ++; - PD (umb); + divu_cycles (umb, uma); } break; @@ -776,9 +1212,9 @@ decode_opcode () mb = GS (); sll = (long long)ma * (long long)mb; tprintf("%d * %d = %lld\n", ma, mb, sll); - PD (sll); - opcode.op[0].reg ++; - PD (sll >> 32); + put_reg (opcode->op[0].reg, sll); + put_reg (opcode->op[0].reg + 1, sll >> 32); + E2; break; case RXO_emulu: @@ -786,13 +1222,14 @@ decode_opcode () umb = GS (); ll = (long long)uma * (long long)umb; tprintf("%#x * %#x = %#llx\n", uma, umb, ll); - PD (ll); - opcode.op[0].reg ++; - PD (ll >> 32); + put_reg (opcode->op[0].reg, ll); + put_reg (opcode->op[0].reg + 1, ll >> 32); + E2; break; case RXO_fadd: FLOAT_OP (fadd); + E (4); break; case RXO_fcmp: @@ -801,24 +1238,32 @@ decode_opcode () FPCLEAR (); rxfp_cmp (ma, mb); FPCHECK (); + E (1); break; case RXO_fdiv: FLOAT_OP (fdiv); + E (16); break; case RXO_fmul: FLOAT_OP (fmul); + E (3); break; case RXO_rtfi: PRIVILEDGED (); regs.r_psw = regs.r_bpsw; regs.r_pc = regs.r_bpc; +#ifdef CYCLE_ACCURATE + regs.fast_return = 0; + cycles(3); +#endif break; case RXO_fsub: FLOAT_OP (fsub); + E (4); break; case RXO_ftoi: @@ -829,13 +1274,16 @@ decode_opcode () PD (mb); tprintf("(int) %g = %d\n", int2float(ma), mb); set_sz (mb, 4); + E (2); break; case RXO_int: v = GS (); if (v == 255) { - return rx_syscall (regs.r[5]); + int rc = rx_syscall (regs.r[5]); + if (! RX_STEPPED (rc)) + DO_RETURN (rc); } else { @@ -845,6 +1293,7 @@ decode_opcode () pushpc (regs.r_pc); regs.r_pc = mem_get_si (regs.r_intb + 4 * v); } + cycles (6); break; case RXO_itof: @@ -855,51 +1304,91 @@ decode_opcode () tprintf("(float) %d = %x\n", ma, mb); PD (mb); set_sz (ma, 4); + E (2); break; case RXO_jsr: case RXO_jsrrel: - v = GD (); - pushpc (get_reg (pc)); - if (opcode.id == RXO_jsrrel) - v += regs.r_pc; - put_reg (pc, v); + { +#ifdef CYCLE_ACCURATE + int delta; + regs.m2m = 0; +#endif + v = GD (); +#ifdef CYCLE_ACCURATE + regs.link_register = regs.r_pc; +#endif + pushpc (get_reg (pc)); + if (opcode->id == RXO_jsrrel) + v += regs.r_pc; +#ifdef CYCLE_ACCURATE + delta = v - regs.r_pc; +#endif + put_reg (pc, v); +#ifdef CYCLE_ACCURATE + /* Note: docs say 3, chip says 2 */ + if (delta >= 0 && delta < 16) + { + tprintf ("near forward jsr bonus\n"); + cycles (2); + } + else + { + branch_alignment_penalty = 1; + cycles (3); + } + regs.fast_return = 1; +#endif + } break; case RXO_machi: ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16); ll <<= 16; put_reg64 (acc64, ll + regs.r_acc); + E1; break; case RXO_maclo: ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ()); ll <<= 16; put_reg64 (acc64, ll + regs.r_acc); + E1; break; case RXO_max: - ma = GD(); mb = GS(); + ma = GD(); if (ma > mb) PD (ma); else PD (mb); + E (1); break; case RXO_min: - ma = GD(); mb = GS(); + ma = GD(); if (ma < mb) PD (ma); else PD (mb); + E (1); break; case RXO_mov: v = GS (); - if (opcode.op[0].type == RX_Operand_Register - && opcode.op[0].reg == 16 /* PSW */) + + if (opcode->op[1].type == RX_Operand_Register + && opcode->op[1].reg == 17 /* PC */) + { + /* Special case. We want the address of the insn, not the + address of the next insn. */ + v = opcode_pc; + } + + if (opcode->op[0].type == RX_Operand_Register + && opcode->op[0].reg == 16 /* PSW */) { /* Special case, LDC and POPC can't ever modify PM. */ int pm = regs.r_psw & FLAGBIT_PM; @@ -914,86 +1403,109 @@ decode_opcode () if (FLAG_PM) { /* various things can't be changed in user mode. */ - if (opcode.op[0].type == RX_Operand_Register) - if (opcode.op[0].reg == 32) + if (opcode->op[0].type == RX_Operand_Register) + if (opcode->op[0].reg == 32) { v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL); v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL); } - if (opcode.op[0].reg == 34 /* ISP */ - || opcode.op[0].reg == 37 /* BPSW */ - || opcode.op[0].reg == 39 /* INTB */ - || opcode.op[0].reg == 38 /* VCT */) + if (opcode->op[0].reg == 34 /* ISP */ + || opcode->op[0].reg == 37 /* BPSW */ + || opcode->op[0].reg == 39 /* INTB */ + || opcode->op[0].reg == 38 /* VCT */) /* These are ignored. */ break; } + if (OM(0) && OM(1)) + cycles (2); + else + cycles (1); + PD (v); + +#ifdef CYCLE_ACCURATE + if ((opcode->op[0].type == RX_Operand_Predec + && opcode->op[1].type == RX_Operand_Register) + || (opcode->op[0].type == RX_Operand_Postinc + && opcode->op[1].type == RX_Operand_Register)) + { + /* Special case: push reg doesn't cause a memory stall. */ + memory_dest = 0; + tprintf("push special case\n"); + } +#endif + set_sz (v, DSZ()); break; case RXO_movbi: - /* We cheat to save on code duplication. */ - regs.r_temp = (get_reg (opcode.op[1].reg) * size2bytes[opcode.size] - + get_reg (opcode.op[2].reg)); - opcode.op[1].reg = r_temp_idx; - opcode.op[1].type = RX_Operand_Indirect; - opcode.op[1].addend = 0; PD (GS ()); + cycles (1); break; case RXO_movbir: - /* We cheat to save on code duplication. */ - regs.r_temp = (get_reg (opcode.op[1].reg) * size2bytes[opcode.size] - + get_reg (opcode.op[2].reg)); - opcode.op[1].reg = r_temp_idx; - opcode.op[1].type = RX_Operand_Indirect; - opcode.op[1].addend = 0; PS (GD ()); + cycles (1); break; case RXO_mul: - ll = (unsigned long long) US1() * (unsigned long long) US2(); + v = US2 (); + ll = (unsigned long long) US1() * (unsigned long long) v; PD(ll); + E (1); break; case RXO_mulhi: - ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16); + v = GS2 (); + ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16); ll <<= 16; put_reg64 (acc64, ll); + E1; break; case RXO_mullo: - ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ()); + v = GS2 (); + ll = (long long)(signed short)(GS()) * (long long)(signed short)(v); ll <<= 16; put_reg64 (acc64, ll); + E1; break; case RXO_mvfachi: PD (get_reg (acchi)); + E1; break; case RXO_mvfaclo: PD (get_reg (acclo)); + E1; break; case RXO_mvfacmi: PD (get_reg (accmi)); + E1; break; case RXO_mvtachi: put_reg (acchi, GS ()); + E1; break; case RXO_mvtaclo: put_reg (acclo, GS ()); + E1; break; case RXO_mvtipl: regs.r_psw &= ~ FLAGBITS_IPL; regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL; + E1; break; case RXO_nop: + case RXO_nop2: + case RXO_nop3: + E1; break; case RXO_or: @@ -1002,32 +1514,36 @@ decode_opcode () case RXO_popm: /* POPM cannot pop R0 (sp). */ - if (opcode.op[1].reg == 0 || opcode.op[2].reg == 0) + if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0) EXCEPTION (EX_UNDEFINED); - if (opcode.op[1].reg >= opcode.op[2].reg) + if (opcode->op[1].reg >= opcode->op[2].reg) { regs.r_pc = opcode_pc; - return RX_MAKE_STOPPED (SIGILL); + DO_RETURN (RX_MAKE_STOPPED (SIGILL)); + } + for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++) + { + cycles (1); + RLD (v); + put_reg (v, pop ()); } - for (v = opcode.op[1].reg; v <= opcode.op[2].reg; v++) - put_reg (v, pop ()); - break; - - case RXO_pusha: - push (get_reg (opcode.op[1].reg) + opcode.op[1].addend); break; case RXO_pushm: /* PUSHM cannot push R0 (sp). */ - if (opcode.op[1].reg == 0 || opcode.op[2].reg == 0) + if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0) EXCEPTION (EX_UNDEFINED); - if (opcode.op[1].reg >= opcode.op[2].reg) + if (opcode->op[1].reg >= opcode->op[2].reg) { regs.r_pc = opcode_pc; return RX_MAKE_STOPPED (SIGILL); } - for (v = opcode.op[2].reg; v >= opcode.op[1].reg; v--) - push (get_reg (v)); + for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--) + { + RL (v); + push (get_reg (v)); + } + cycles (opcode->op[2].reg - opcode->op[1].reg + 1); break; case RXO_racw: @@ -1040,6 +1556,7 @@ decode_opcode () else ll &= 0xffffffff00000000ULL; put_reg64 (acc64, ll); + E1; break; case RXO_rte: @@ -1048,6 +1565,10 @@ decode_opcode () regs.r_psw = poppc (); if (FLAG_PM) regs.r_psw |= FLAGBIT_U; +#ifdef CYCLE_ACCURATE + regs.fast_return = 0; + cycles (6); +#endif break; case RXO_revl: @@ -1057,6 +1578,7 @@ decode_opcode () | ((uma << 8) & 0xff0000) | ((uma << 24) & 0xff000000UL)); PD (umb); + E1; break; case RXO_revw: @@ -1064,14 +1586,21 @@ decode_opcode () umb = (((uma >> 8) & 0x00ff00ff) | ((uma << 8) & 0xff00ff00UL)); PD (umb); + E1; break; case RXO_rmpa: + RL(4); + RL(5); +#ifdef CYCLE_ACCURATE + tx = regs.r[3]; +#endif + while (regs.r[3] != 0) { long long tmp; - switch (opcode.size) + switch (opcode->size) { case RX_Long: ma = mem_get_si (regs.r[1]); @@ -1124,6 +1653,22 @@ decode_opcode () set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O); else set_flags (FLAGBIT_O|FLAGBIT_S, ma); +#ifdef CYCLE_ACCURATE + switch (opcode->size) + { + case RX_Long: + cycles (6 + 4 * tx); + break; + case RX_Word: + cycles (6 + 5 * (tx / 2) + 4 * (tx % 2)); + break; + case RX_Byte: + cycles (6 + 7 * (tx / 4) + 4 * (tx % 4)); + break; + default: + abort (); + } +#endif break; case RXO_rolc: @@ -1133,6 +1678,7 @@ decode_opcode () v |= carry; set_szc (v, 4, ma); PD (v); + E1; break; case RXO_rorc: @@ -1142,6 +1688,7 @@ decode_opcode () uma |= (carry ? 0x80000000UL : 0); set_szc (uma, 4, mb); PD (uma); + E1; break; case RXO_rotl: @@ -1154,6 +1701,7 @@ decode_opcode () } set_szc (uma, 4, mb); PD (uma); + E1; break; case RXO_rotr: @@ -1166,6 +1714,7 @@ decode_opcode () } set_szc (uma, 4, mb); PD (uma); + E1; break; case RXO_round: @@ -1176,26 +1725,73 @@ decode_opcode () PD (mb); tprintf("(int) %g = %d\n", int2float(ma), mb); set_sz (mb, 4); + E (2); break; case RXO_rts: - regs.r_pc = poppc (); + { +#ifdef CYCLE_ACCURATE + int cyc = 5; +#endif + regs.r_pc = poppc (); +#ifdef CYCLE_ACCURATE + /* Note: specs say 5, chip says 3. */ + if (regs.fast_return && regs.link_register == regs.r_pc) + { +#ifdef CYCLE_STATS + fast_returns ++; +#endif + tprintf("fast return bonus\n"); + cyc -= 2; + } + cycles (cyc); + regs.fast_return = 0; + branch_alignment_penalty = 1; +#endif + } break; case RXO_rtsd: - if (opcode.op[2].type == RX_Operand_Register) + if (opcode->op[2].type == RX_Operand_Register) { int i; /* RTSD cannot pop R0 (sp). */ - put_reg (0, get_reg (0) + GS() - (opcode.op[0].reg-opcode.op[2].reg+1)*4); - if (opcode.op[2].reg == 0) + put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4); + if (opcode->op[2].reg == 0) EXCEPTION (EX_UNDEFINED); - for (i = opcode.op[2].reg; i <= opcode.op[0].reg; i ++) - put_reg (i, pop ()); +#ifdef CYCLE_ACCURATE + tx = opcode->op[0].reg - opcode->op[2].reg + 1; +#endif + for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++) + { + RLD (i); + put_reg (i, pop ()); + } + } + else + { +#ifdef CYCLE_ACCURATE + tx = 0; +#endif + put_reg (0, get_reg (0) + GS()); + } + put_reg (pc, poppc()); +#ifdef CYCLE_ACCURATE + if (regs.fast_return && regs.link_register == regs.r_pc) + { + tprintf("fast return bonus\n"); +#ifdef CYCLE_STATS + fast_returns ++; +#endif + cycles (tx < 3 ? 3 : tx + 1); } else - put_reg (0, get_reg (0) + GS()); - put_reg (pc, poppc ()); + { + cycles (tx < 5 ? 5 : tx + 1); + } + regs.fast_return = 0; + branch_alignment_penalty = 1; +#endif break; case RXO_sat: @@ -1203,6 +1799,7 @@ decode_opcode () PD (0x7fffffffUL); else if (FLAG_O && ! FLAG_S) PD (0x80000000UL); + E1; break; case RXO_sbb: @@ -1214,9 +1811,13 @@ decode_opcode () PD (1); else PD (0); + E1; break; case RXO_scmpu: +#ifdef CYCLE_ACCURATE + tx = regs.r[3]; +#endif while (regs.r[3] != 0) { uma = mem_get_qi (regs.r[1] ++); @@ -1229,36 +1830,56 @@ decode_opcode () set_zc (1, 1); else set_zc (0, ((int)uma - (int)umb) >= 0); + cycles (2 + 4 * (tx / 4) + 4 * (tx % 4)); break; case RXO_setpsw: - v = 1 << opcode.op[0].reg; + v = 1 << opcode->op[0].reg; if (FLAG_PM && (v == FLAGBIT_I || v == FLAGBIT_U)) break; regs.r_psw |= v; + cycles (1); break; case RXO_smovb: + RL (3); +#ifdef CYCLE_ACCURATE + tx = regs.r[3]; +#endif while (regs.r[3]) { uma = mem_get_qi (regs.r[2] --); mem_put_qi (regs.r[1]--, uma); regs.r[3] --; } +#ifdef CYCLE_ACCURATE + if (tx > 3) + cycles (6 + 3 * (tx / 4) + 3 * (tx % 4)); + else + cycles (2 + 3 * (tx % 4)); +#endif break; case RXO_smovf: + RL (3); +#ifdef CYCLE_ACCURATE + tx = regs.r[3]; +#endif while (regs.r[3]) { uma = mem_get_qi (regs.r[2] ++); mem_put_qi (regs.r[1]++, uma); regs.r[3] --; } + cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4)); break; case RXO_smovu: +#ifdef CYCLE_ACCURATE + tx = regs.r[3]; +#endif while (regs.r[3] != 0) { uma = mem_get_qi (regs.r[2] ++); @@ -1267,22 +1888,30 @@ decode_opcode () if (uma == 0) break; } + cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4)); break; case RXO_shar: /* d = ma >> mb */ SHIFT_OP (sll, int, mb, >>=, 1); + E (1); break; case RXO_shll: /* d = ma << mb */ SHIFT_OP (ll, int, mb, <<=, 0x80000000UL); + E (1); break; case RXO_shlr: /* d = ma >> mb */ SHIFT_OP (ll, unsigned int, mb, >>=, 1); + E (1); break; case RXO_sstr: - switch (opcode.size) + RL (3); +#ifdef CYCLE_ACCURATE + tx = regs.r[3]; +#endif + switch (opcode->size) { case RX_Long: while (regs.r[3] != 0) @@ -1291,6 +1920,7 @@ decode_opcode () regs.r[1] += 4; regs.r[3] --; } + cycles (2 + tx); break; case RX_Word: while (regs.r[3] != 0) @@ -1299,6 +1929,7 @@ decode_opcode () regs.r[1] += 2; regs.r[3] --; } + cycles (2 + (int)(tx / 2) + tx % 2); break; case RX_Byte: while (regs.r[3] != 0) @@ -1307,6 +1938,7 @@ decode_opcode () regs.r[1] ++; regs.r[3] --; } + cycles (2 + (int)(tx / 4) + tx % 4); break; default: abort (); @@ -1316,21 +1948,29 @@ decode_opcode () case RXO_stcc: if (GS2()) PD (GS ()); + E1; break; case RXO_stop: PRIVILEDGED (); regs.r_psw |= FLAGBIT_I; - return RX_MAKE_STOPPED(0); + DO_RETURN (RX_MAKE_STOPPED(0)); case RXO_sub: MATH_OP (-, 0); break; case RXO_suntil: + RL(3); +#ifdef CYCLE_ACCURATE + tx = 0; +#endif if (regs.r[3] == 0) - break; - switch (opcode.size) + { + cycles (3); + break; + } + switch (opcode->size) { case RX_Long: uma = get_reg (2); @@ -1339,9 +1979,15 @@ decode_opcode () regs.r[3] --; umb = mem_get_si (get_reg (1)); regs.r[1] += 4; +#ifdef CYCLE_ACCURATE + tx ++; +#endif if (umb == uma) break; } +#ifdef CYCLE_ACCURATE + cycles (3 + 3 * tx); +#endif break; case RX_Word: uma = get_reg (2) & 0xffff; @@ -1350,9 +1996,15 @@ decode_opcode () regs.r[3] --; umb = mem_get_hi (get_reg (1)); regs.r[1] += 2; +#ifdef CYCLE_ACCURATE + tx ++; +#endif if (umb == uma) break; } +#ifdef CYCLE_ACCURATE + cycles (3 + 3 * (tx / 2) + 3 * (tx % 2)); +#endif break; case RX_Byte: uma = get_reg (2) & 0xff; @@ -1361,9 +2013,15 @@ decode_opcode () regs.r[3] --; umb = mem_get_qi (regs.r[1]); regs.r[1] += 1; +#ifdef CYCLE_ACCURATE + tx ++; +#endif if (umb == uma) break; } +#ifdef CYCLE_ACCURATE + cycles (3 + 3 * (tx / 4) + 3 * (tx % 4)); +#endif break; default: abort(); @@ -1375,9 +2033,13 @@ decode_opcode () break; case RXO_swhile: + RL(3); +#ifdef CYCLE_ACCURATE + tx = 0; +#endif if (regs.r[3] == 0) break; - switch (opcode.size) + switch (opcode->size) { case RX_Long: uma = get_reg (2); @@ -1386,9 +2048,15 @@ decode_opcode () regs.r[3] --; umb = mem_get_si (get_reg (1)); regs.r[1] += 4; +#ifdef CYCLE_ACCURATE + tx ++; +#endif if (umb != uma) break; } +#ifdef CYCLE_ACCURATE + cycles (3 + 3 * tx); +#endif break; case RX_Word: uma = get_reg (2) & 0xffff; @@ -1397,9 +2065,15 @@ decode_opcode () regs.r[3] --; umb = mem_get_hi (get_reg (1)); regs.r[1] += 2; +#ifdef CYCLE_ACCURATE + tx ++; +#endif if (umb != uma) break; } +#ifdef CYCLE_ACCURATE + cycles (3 + 3 * (tx / 2) + 3 * (tx % 2)); +#endif break; case RX_Byte: uma = get_reg (2) & 0xff; @@ -1408,9 +2082,15 @@ decode_opcode () regs.r[3] --; umb = mem_get_qi (regs.r[1]); regs.r[1] += 1; +#ifdef CYCLE_ACCURATE + tx ++; +#endif if (umb != uma) break; } +#ifdef CYCLE_ACCURATE + cycles (3 + 3 * (tx / 4) + 3 * (tx % 4)); +#endif break; default: abort(); @@ -1424,12 +2104,21 @@ decode_opcode () case RXO_wait: PRIVILEDGED (); regs.r_psw |= FLAGBIT_I; - return RX_MAKE_STOPPED(0); + DO_RETURN (RX_MAKE_STOPPED(0)); case RXO_xchg: +#ifdef CYCLE_ACCURATE + regs.m2m = 0; +#endif v = GS (); /* This is the memory operand, if any. */ PS (GD ()); /* and this may change the address register. */ PD (v); + E2; +#ifdef CYCLE_ACCURATE + /* all M cycles happen during xchg's cycles. */ + memory_dest = 0; + memory_source = 0; +#endif break; case RXO_xor: @@ -1440,5 +2129,122 @@ decode_opcode () EXCEPTION (EX_UNDEFINED); } +#ifdef CYCLE_ACCURATE + regs.m2m = 0; + if (memory_source) + regs.m2m |= M2M_SRC; + if (memory_dest) + regs.m2m |= M2M_DST; + + regs.rt = new_rt; + new_rt = -1; +#endif + +#ifdef CYCLE_STATS + if (prev_cycle_count == regs.cycle_count) + { + printf("Cycle count not updated! id %s\n", id_names[opcode->id]); + abort (); + } +#endif + +#ifdef CYCLE_STATS + if (running_benchmark) + { + int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type); + + + cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count; + times_per_id[opcode->id][omap] ++; + + times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++; + + prev_opcode_id = opcode->id; + po0 = omap; + } +#endif + return RX_MAKE_STEPPED (); } + +#ifdef CYCLE_STATS +void +reset_pipeline_stats (void) +{ + memset (cycles_per_id, 0, sizeof(cycles_per_id)); + memset (times_per_id, 0, sizeof(times_per_id)); + memory_stalls = 0; + register_stalls = 0; + branch_stalls = 0; + branch_alignment_stalls = 0; + fast_returns = 0; + memset (times_per_pair, 0, sizeof(times_per_pair)); + running_benchmark = 1; + + benchmark_start_cycle = regs.cycle_count; +} + +void +halt_pipeline_stats (void) +{ + running_benchmark = 0; + benchmark_end_cycle = regs.cycle_count; +} +#endif + +void +pipeline_stats (void) +{ +#ifdef CYCLE_STATS + int i, o1; + int p, p1; +#endif + +#ifdef CYCLE_ACCURATE + if (verbose == 1) + { + printf ("cycles: %llu\n", regs.cycle_count); + return; + } + + printf ("cycles: %13s\n", comma (regs.cycle_count)); +#endif + +#ifdef CYCLE_STATS + if (benchmark_start_cycle) + printf ("bmark: %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle)); + + printf("\n"); + for (i = 0; i < N_RXO; i++) + for (o1 = 0; o1 < N_MAP; o1 ++) + if (times_per_id[i][o1]) + printf("%13s %13s %7.2f %s %s\n", + comma (cycles_per_id[i][o1]), + comma (times_per_id[i][o1]), + (double)cycles_per_id[i][o1] / times_per_id[i][o1], + op_cache_string(o1), + id_names[i]+4); + + printf("\n"); + for (p = 0; p < N_RXO; p ++) + for (p1 = 0; p1 < N_MAP; p1 ++) + for (i = 0; i < N_RXO; i ++) + for (o1 = 0; o1 < N_MAP; o1 ++) + if (times_per_pair[p][p1][i][o1]) + { + printf("%13s %s %-9s -> %s %s\n", + comma (times_per_pair[p][p1][i][o1]), + op_cache_string(p1), + id_names[p]+4, + op_cache_string(o1), + id_names[i]+4); + } + + printf("\n"); + printf("%13s memory stalls\n", comma (memory_stalls)); + printf("%13s register stalls\n", comma (register_stalls)); + printf("%13s branches taken (non-return)\n", comma (branch_stalls)); + printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls)); + printf("%13s fast returns\n", comma (fast_returns)); +#endif +}