2 Skelton for retropc emulator
4 Author : Takeda.Toshiya
14 #pragma warning( disable : 4244 )
18 #define M_E 2.71828182845904523536
22 //#define APU_CLOCK CPU_CLOCKS
25 /* Xep128: Minimalistic Enterprise-128 emulator with focus on "exotic" hardware
26 Copyright (C)2014,2015,2016 LGB (Gábor Lénárt) <lgblgblgb@gmail.com>
29 This program is free software; you can redistribute it and/or modify
30 it under the terms of the GNU General Public License as published by
31 the Free Software Foundation; either version 2 of the License, or
32 (at your option) any later version.
34 This program is distributed in the hope that it will be useful,
35 but WITHOUT ANY WARRANTY; without even the implied warranty of
36 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37 GNU General Public License for more details.
39 You should have received a copy of the GNU General Public License
40 along with this program; if not, write to the Free Software
41 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
49 /* From my JSep emulator:
51 * (C)2014 Gábor Lénárt LGB http://ep.lgb.hu/jsep/
52 * Part of my JavaScript based Enterprise-128 emulator ("JSep" aka "webemu").
53 * Am9511 "APU" FPU emulation, somewhat (ehhh, a lot!) incorrect
55 * Thanks to Povi for testing APU support.
57 * http://www.hartetechnologies.com/manuals/AMD/AMD%209511%20FPU.pdf
58 * http://www.joelowens.org/z80/am9511algorithms.pdf
59 * http://www.joelowens.org/z80/am9511fpmanual.pdf
61 * Major problems with my emulation:
63 * Precision: converts data between APU formats and JS numeric, real Am9511 may give a sightly different results in case of floats.
64 * Timing: uses constant timings, real APU varies execution times depending on the operands.
65 * Stack content: real APU destroys some elements in case of some OPS other than TOS. This is not emulated.
66 * APU status: I am not always sure what status flags modified and how.
67 * Results: I am not always sure even about the result of ops. Eg: SMUL/SMUU, what happens on signed values, etc, result can be even WRONG.
68 * Usage: emulation always assumes Z80 will be stopped, no WAIT/SRV etc (so bit 7 of command does not count either)
69 * Cleanness: my code uses pop/push primitives which is often quite expensive, but the code is more compact and it's enough for a few MHz chip emulation in JS still :)
72 //static uint8_t _apu_stack[16];
73 //static int _apu_tos;
74 //static uint8_t _apu_status;
76 // Note: NEGARG,ZERODIV,LARGE are truely not independent, you should not mix them, but use only one! Others can be "mixed"
78 #define _APU_F_CARRY 1
79 #define _APU_F_OVERFLOW 2
80 #define _APU_F_UNDERFLOW 4
81 #define _APU_F_NEGARG 8
82 #define _APU_F_ZERODIV 16
83 #define _APU_F_LARGE 24
84 #define _APU_F_ZERO 32
85 #define _APU_F_SIGN 64
86 #define _APU_F_BUSY 128 // this is not used, as APU for EP is used to stop Z80 while working, so Z80 will never found this bit set, thus there is no need to set ...
88 void AM9511::apu_reset ( void )
92 memset(_apu_stack, 0, sizeof _apu_stack);
96 uint8_t AM9511::apu_read_status( void )
102 void AM9511::_apu_move( int n)
104 _apu_tos = (_apu_tos + n) & 0xF;
108 uint8_t AM9511::_apu_look8(int depth)
110 return _apu_stack[(_apu_tos - depth) & 0xF];
114 uint8_t AM9511::_apu_pop8()
117 return _apu_look8(-1);
121 uint8_t AM9511::apu_read_data()
127 void AM9511::_apu_push8(uint8_t data)
130 //_apu_tos = (_apu_tos + 1) & 0xF;
131 _apu_stack[_apu_tos] = data; // will be trucated to byte
135 void AM9511::apu_write_data(uint8_t data)
141 int AM9511::_apu_pop_fix16(void) {
142 int data = _apu_pop8() << 8;
144 if (data & 0x8000) data = data - 0x10000; // two's complement correction
148 // push fix16 format, also updates the status (zero, sign, overflow)
149 void AM9511::_apu_push_fix16(int data) {
150 if (data == 0) _apu_status |= _APU_F_ZERO; // zero flag
152 _apu_status |= _APU_F_SIGN; // negative flag
153 data += 0x10000; // two's complement correction
155 if (data > 0xFFFF || data < 0) _apu_status |= _APU_F_OVERFLOW; // overflow flag [WTF]
157 _apu_push8(data >> 8);
160 int64_t AM9511::_apu_pop_fix32(void) {
161 int64_t data = _apu_pop8() << 24;
162 data |= _apu_pop8() << 16;
163 data |= _apu_pop8() << 8;
165 if (data > 2147483647L) data = data - 4294967296L; // two's complement correction
169 void AM9511::_apu_push_fix32(int64_t data) {
170 if (data == 0) _apu_status |= _APU_F_ZERO;
172 _apu_status |= _APU_F_SIGN;
175 if (data > 4294967295UL || data < 0) _apu_status |= _APU_F_OVERFLOW;
177 _apu_push8(data >> 8);
178 _apu_push8(data >> 16);
179 _apu_push8(data >> 24);
182 /* Foreword for FLOAT handling: I use natural float (well, double ...)
183 * numberic format of C, using pop/push APU functions to convert from/to.
184 * This is kinda messy, and not bit-exact emulation of Am9511.
185 * Even my lame push/pop functions can be done much better!!
189 double AM9511::_apu_pop_float()
191 int exp = _apu_pop8();
192 int data = _apu_pop8() << 16;
194 data |= _apu_pop8() << 8;
196 if (!(data & 0x800000)) return 0.0; // MSB of mantissa must be 1 always, _except_ for the value zero, where all bytes should be zero (including the MSB of mantissa)
197 if (exp & 128) data = -data;
198 if (exp & 64) exp = (exp & 63) - 64; else exp &= 63;
199 fdata = pow(2.0, exp) * ((double)data / 16777216.0);
200 //DEBUG("APU: float is internally pop'ed: %f" NL, fdata);
205 void AM9511::_apu_push_float(double data)
208 if (!my_isfinite(data)) { // this should be true for the whole condition of argument is NaN of Infinity ...
209 _apu_push8(0); // bad result for NaN, but something should be there (_apu_move() would be better one to "rollback" the stack?!)
213 _apu_status |= _APU_F_LARGE;
216 if (data == 0) { // if value is zero, we handle it as a special case, as logarithm function would panic on value of zero.
221 _apu_status |= _APU_F_ZERO; // zero flag
224 neg = data < 0; // remember the sign of the value (bool)
227 data = data / pow(2.0, exp);
228 i = (data * 16777216.0);
230 // ehm, not normalized mantissa or such a problem?
235 _apu_status |= _APU_F_ZERO | _APU_F_UNDERFLOW; // since we handled zero case at the begining, zero value here means the underflow-gap, I guess
239 _apu_status |= _APU_F_OVERFLOW;
240 } else if (exp < -64) {
241 //exp = -((-exp) & 63); // WRONG! TODO, FIXME, HELP, ETC :D
242 exp = ((64 + exp) & 63) | 64;
243 _apu_status |= _APU_F_OVERFLOW;
244 } else if (exp < 0) {
245 exp = ((64 + exp) & 63) | 64;
249 _apu_status |= _APU_F_SIGN; // negative flag
251 //if (data && (!(data & 0x800000)))
252 // DEBUG("APU: warning: irregular manitssa: ", data);
253 // Pushing 8 bit bytes onto the APU stack
257 _apu_push8(exp); // this byte holds the exponent, and also the sign of the mantissa
258 //if (data == 0) _apu_status |= _APU_F_UNDERFLOW; // hmmm. zero case is handled at the beginning, so if it's zero we are in the underflow-gap of the format. or whatever :D
262 // set S and Z flags of status on TOS, interpreting it as fixed 16 format
263 void AM9511:: _apu_sz_fix16(void) {
264 if (_apu_look8(0) & 128) _apu_status |= _APU_F_SIGN;
265 if (_apu_look8(0) + _apu_look8(1) == 0) _apu_status |= _APU_F_ZERO; // this testing method for zeroness works as apu_look8() gives back only unsigned bytes ...
267 void AM9511::_apu_sz_fix32(void) {
268 if (_apu_look8(0) & 128) _apu_status |= _APU_F_SIGN;
269 if (_apu_look8(0) + _apu_look8(1) + _apu_look8(2) + _apu_look8(3) == 0) _apu_status |= _APU_F_ZERO;
271 void AM9511::_apu_sz_float(void) {
272 if (_apu_look8(0) & 128) _apu_status |= _APU_F_SIGN;
273 if ((_apu_look8(1) & 128) == 0) _apu_status |= _APU_F_ZERO; // we use only a single bit to test the zeroness of a float.
277 void AM9511::_apu_xchg(int d1, int d2) {
278 uint8_t n = _apu_look8(d1);
279 _apu_stack[(_apu_tos - d1) & 0xF] = _apu_look8(d2);
280 _apu_stack[(_apu_tos - d2) & 0xF] = n;
282 void AM9511::_apu_copy(int from, int to) {
283 _apu_stack[(_apu_tos - to) & 0xF] = _apu_look8(from);
287 /* Note, call of this function should be AFTER calling _apu_push* functions as those may set overflow flag we want to keep as cleared here ...
288 * I am still not sure about the difference of overflow and underflow, also not the over-/underflow and carry. For the second problem:
289 * it's said that the maximal (or minimal value) can be extended by the carry flag, so there are three cases basically: a number can
290 * be represented without overflow and carry, the number can be represented as carry to be thought of the extension of the result,
291 * and the overflow, when the result can't represented even with the extended result size by the carry bit. Hmmm. But then, should
292 * carry to be set in case of overflow, or not?
294 void AM9511::_apu_carry ( int64_t val, int64_t limit )
296 if (val >= limit * 2 || val < -limit * 2) {
297 _apu_status |= _APU_F_OVERFLOW;
298 // should carry set here????????????????
299 _apu_status |= _APU_F_CARRY;
300 } else if (val >= limit || val < -limit) {
301 _apu_status &= 255 - _APU_F_OVERFLOW;
302 _apu_status |= _APU_F_CARRY;
307 /* Note: most of the command emulation uses the fix32/fix16/float POP/PUSH functions.
308 * In some cases it's not the optimal solution (performance) but it's much simplier.
309 * However in case of floats it can cause some odd things, ie APU-float<->C-double conversion
310 * rounding problems on POP/PUSH ... Well maybe I will deal with this later versions,
311 * now the short solution ... */
312 void AM9511::apu_write_command ( uint8_t cmd )
317 //int _apu_tos_old = _apu_tos;
319 _apu_status = 0; // I am still not sure that ops according to spec which "do not affect a flag" means that it is UNCHANGED from the previous op, or simply zero and never set. Hmmm.
320 switch (cmd & 0x7F) { // note, SR (bit7) field of command is currently ignored!
321 /* --------------------------------------- */
322 /* ---- 16 bit fixed point operations ---- */
323 /* --------------------------------------- */
324 case 0x6C: // SADD: Add TOS to NOS. Result to NOS. Pop Stack.
325 i = _apu_pop_fix16() + _apu_pop_fix16();
327 _apu_carry(i, 0x8000);
330 case 0x6D: // SSUB: Substract TOS from NOS. Result to NOS. Pop Stack.
331 i = _apu_pop_fix16();
332 i = _apu_pop_fix16() - i;
334 _apu_carry(i, 0x8000);
337 case 0x6E: // SMUL: Multiply NOS by TOS. Lower result to NOS. Pop Stack.
338 i = _apu_pop_fix16() * _apu_pop_fix16();
342 case 0x76: // SMUU: Multiply NOS by TOS. Upper result to NOS. Pop Stack.
343 i = _apu_pop_fix16() * _apu_pop_fix16();
344 _apu_push_fix16(i >> 16);
347 case 0x6F: // SDIV: Divide NOS by TOS. Result to NOS. Pop Stack.
348 i = _apu_pop_fix16(); // TOS
350 _apu_push_fix16(_apu_pop_fix16() / i);
352 } else { // TOS = 0, divide by zero error
353 // TOS = 0 case, APU simply puts old NOS as result, that is, leave the original NOS, which is now the TOS
354 _apu_status |= _APU_F_ZERODIV;
358 /* --------------------------------------- */
359 /* ---- 32 bit fixed point operations ---- */
360 /* --------------------------------------- */
361 case 0x2C: // DADD: Add TOS to NOS. Result to NOS. Pop Stack.
362 l = _apu_pop_fix32() + _apu_pop_fix32();
364 _apu_carry(l, 0x80000000L);
367 case 0x2D: // DSUB: Substract TOS from NOS. Result to NOS. Pop Stack.
368 l = _apu_pop_fix32();
369 l = _apu_pop_fix32() - l;
371 _apu_carry(l, 0x80000000L);
374 case 0x2E: // DMUL: Multiply NOS by TOS. Lower result to NOS. Pop Stack.
375 l = _apu_pop_fix32() * _apu_pop_fix32();
379 case 0x36: // DMUU: Multiply NOS by TOS. Upper result to NOS. Pop Stack.
380 l = _apu_pop_fix32() * _apu_pop_fix32();
381 _apu_push_fix32(l >> 32);
384 case 0x2F: // DDIV: Divide NOS by TOS. Result to NOS. Pop Stack.
385 l = _apu_pop_fix32(); // TOS
387 _apu_push_fix32(_apu_pop_fix32() / l);
389 } else { // TOS = 0, divide by zero error
390 // TOS = 0 case, APU simply puts old NOS as result, that is, leave the original NOS, which is now the TOS
391 _apu_status |= _APU_F_ZERODIV;
395 /* -------------------------------------------------- */
396 /* ---- 32 bit floating point primary operations ---- */
397 /* -------------------------------------------------- */
398 case 0x10: // FADD: Add TOS to NOS. Result to NOS. Pop Stack.
399 f = _apu_pop_float();
400 _apu_push_float(_apu_pop_float() + f);
401 clocks = (f ? 200 : 24);
403 case 0x11: // FSUB: Substract TOS from NOS. Result to NOS. Pop Stack.
404 f = _apu_pop_float();
405 _apu_push_float(_apu_pop_float() - f);
406 clocks = (f ? 200 : 26);
408 case 0x12: // FMUL: Multiply NOS by TOS. Result to NOS. Pop Stack.
409 _apu_push_float(_apu_pop_float() * _apu_pop_float());
412 case 0x13: // FDIV: Divide NOS by TOS. Result to NOS. Pop Stack.
413 f = _apu_pop_float();
415 _apu_push_float(_apu_pop_float() / f);
417 } else { // TOS = 0, divide by zero error
418 // TOS = 0 case, APU simply puts old NOS as result, that is, leave the original NOS, which is now the TOS
419 _apu_status |= _APU_F_ZERODIV;
423 /* -------------------------------------------------- */
424 /* ---- 32 bit floating point derived operations ---- */
425 /* -------------------------------------------------- */
426 case 0x01: // SQRT: Square Root of TOS. Result to TOS.
427 f = _apu_pop_float();
428 _apu_push_float(sqrt(fabs(f))); // we still want to do something with negative number ..., so use fabs() but set the error status on the next line too
429 if (f < 0) _apu_status |= _APU_F_NEGARG; // negative argument signal
432 case 0x02: // SIN: Sine of TOS. Result to TOS.
433 _apu_push_float(sin(_apu_pop_float()));
436 case 0x03: // COS: Cosine of TOS. Result to TOS.
437 _apu_push_float(cos(_apu_pop_float()));
440 case 0x04: // TAN: Tangent of TOS. Result to TOS.
441 _apu_push_float(tan(_apu_pop_float()));
444 case 0x05: // ASIN: Inverse Sine of TOS. Result to TOS.
445 _apu_push_float(asin(_apu_pop_float()));
448 case 0x06: // ACOS: Inverse Cosine of TOS. Result to TOS.
449 _apu_push_float(acos(_apu_pop_float()));
452 case 0x07: // ATAN: Inverse Tangent of TOS. Result to TOS.
453 _apu_push_float(atan(_apu_pop_float()));
456 case 0x08: // LOG: Common Logarithm of TOS. Result to TOS.
457 f = _apu_pop_float();
459 _apu_push_float(log10(f));
462 _apu_status |= _APU_F_NEGARG;
467 case 0x09: // LN: Natural Logarithm of TOS. Result to TOS.
468 f = _apu_pop_float();
470 _apu_push_float(log(f));
473 _apu_status |= _APU_F_NEGARG;
478 case 0x0A: // EXP: "e" raised to power in TOS. Result to TOS.
479 f = _apu_pop_float();
480 _apu_push_float(pow(M_E, f));
481 clocks = (f > 32 ? 34 : 4000);
483 case 0x0B: // PWR: NOS raised to power in TOS. Result to TOS. Pop Stack.
484 f = _apu_pop_float();
485 _apu_push_float(pow(_apu_pop_float(), f));
488 /* ------------------------------------------------ */
489 /* ---- data and stack manipulation operations ---- */
490 /* ------------------------------------------------ */
491 case 0x00: // NOP: does nothing (but clears status, however it's the first instruction done in the main func already
495 case 0x1F: // FIXS: Convert TOS from floating point format to fixed point format (16 bit).
496 _apu_push_fix16(_apu_pop_float());
499 case 0x1E: // FIXD: Convert TOS from floating point format to fixed point format (32 bit).
500 _apu_push_fix32(_apu_pop_float());
503 case 0x1D: // FLTS: Convert TOS from fixed point format (16 bit) to floating point format.
504 _apu_push_float(_apu_pop_fix16());
507 case 0x1C: // FLTD: Convert TOS from fixed point format (32 bit) to floating point format.
508 _apu_push_float(_apu_pop_fix32());
512 case 0x74: // CHSS: Change sign of fixed point (16 bit) operand on TOS.
513 _apu_push_fix16(-_apu_pop_fix16());
516 case 0x34: // CHSD: Change sign of fixed point (32 bit) operand on TOS.
517 _apu_push_fix32(-_apu_pop_fix32());
520 case 0x15: // CHSF: Change sign of floating point operand on TOS. Note: that does not seem to be a big issue, as a single bit should be modified??
521 if (_apu_look8(1) & 128) { // if number is not zero
522 _apu_stack[_apu_tos] ^= 128;
523 if (_apu_stack[_apu_tos] & 128) _apu_status |= _APU_F_SIGN;
524 } else // if number is zero, nothing happens (but we sets zero flag)
525 _apu_status |= _APU_F_ZERO;
530 case 0x77: // PTOS: Push stack. Duplicate NOS to TOS.
537 case 0x37: // PTOD: Push stack. Duplicate NOS to TOS.
546 case 0x17: // PTOF: Push stack. Duplicate NOS to TOS.
556 case 0x78: // POPS: Pop stack. Old NOS becomes new TOS, old TOS rotates to bottom.
558 _apu_sz_fix16(); // set "sz" (S and Z status flags) by inspecting (new) TOS
561 case 0x38: // POPD: Pop stack. Old NOS becomes new TOS, old TOS rotates to bottom.
566 case 0x18: // POPF: Pop stack. Old NOS becomes new TOS, old TOS rotates to bottom.
572 case 0x79: // XCHS: Exchange NOS and TOS. (16 bit fixed)
578 case 0x39: // XCHD: Exchange NOS and TOS. (32 bit fixed)
586 case 0x19: // XCHF: Exchange NOS and TOS. (float stuff)
595 case 0x1A: // PUPI: Push floating point constant PI onto TOS. Previous TOS becomes NOS.
604 // DEBUG("APU: not implemented/unknown Am9511 command: %02Xh" NL, cmd);
605 clocks = 4; // no idea what happens.
608 // clocks *= CPU_CLOCK;
609 // z80ex_w_states((clocks % APU_CLOCK) ? ((clocks / APU_CLOCK) + 1) : (clocks / APU_CLOCK));
613 if(register_id != -1) {
614 cancel_event(this, register_id);
616 register_event(this, 0, (1000000.0 * clocks) / _APU_CLOCK, false, ®ister_id);
618 _apu_status |= _APU_F_BUSY;
622 void AM9511::initialize()
624 DEVICE::initialize();
625 if(osd->check_feature(_T("APU_CLOCK"))) {
626 _APU_CLOCK = (double)osd->get_feature_int_value(_T("APU_CLOCK"));
628 _APU_CLOCK = (double)osd->get_feature_int_value(_T("CPU_CLOCKS"));
638 void AM9511::write_io8(uint32_t addr, uint32_t data)
641 // if(!(_apu_status & _APU_F_BUSY)) {
642 apu_write_command(data);
645 // if(!(_apu_status & _APU_F_BUSY)) {
646 apu_write_data(data);
651 uint32_t AM9511::read_io8(uint32_t addr)
654 return apu_read_status();
656 // if(!(_apu_status & _APU_F_BUSY)) {
657 return apu_read_data();
663 void AM9511::event_callback(int event_id, int err)
667 _apu_status &= ~_APU_F_BUSY;
670 #define STATE_VERSION 1
672 bool AM9511::process_state(FILEIO* state_fio, bool loading)
674 if(!state_fio->StateCheckUint32(STATE_VERSION)) {
677 if(!state_fio->StateCheckInt32(this_device_id)) {
680 state_fio->StateBuffer(_apu_stack, sizeof(_apu_stack), 1);
681 state_fio->StateInt32(_apu_tos);
682 state_fio->StateUint8(_apu_status);
683 state_fio->StateInt32(register_id);