source/src/vm/am9511.cpp

   1 /*
   2         Skelton for retropc emulator
   3
   4         Author : Takeda.Toshiya
   5         Origin : Xep128
   6         Date   : 2017.05.09-
   7
   8         [ AM9511 ]
   9 */
  10
  11 #include "am9511.h"
  12
  13 #ifdef _MSC_VER
  14         #pragma warning( disable : 4244 )
  15 #endif
  16
  17 #ifndef M_E
  18 #define M_E 2.71828182845904523536
  19 #endif
  20
  21 //#ifndef APU_CLOCK
  22 //#define APU_CLOCK CPU_CLOCKS
  23 //#endif
  24
  25 /* Xep128: Minimalistic Enterprise-128 emulator with focus on "exotic" hardware
  26    Copyright (C)2014,2015,2016 LGB (Gábor Lénárt) <lgblgblgb@gmail.com>
  27    http://xep128.lgb.hu/
  28
  29 This program is free software; you can redistribute it and/or modify
  30 it under the terms of the GNU General Public License as published by
  31 the Free Software Foundation; either version 2 of the License, or
  32 (at your option) any later version.
  33
  34 This program is distributed in the hope that it will be useful,
  35 but WITHOUT ANY WARRANTY; without even the implied warranty of
  36 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  37 GNU General Public License for more details.
  38
  39 You should have received a copy of the GNU General Public License
  40 along with this program; if not, write to the Free Software
  41 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
  42
  43 //#include "xep128.h"
  44 //#include "apu.h"
  45 //#include "cpu.h"
  46
  47 #include <math.h>
  48
  49 /* From my JSep emulator:
  50  *
  51  * (C)2014 Gábor Lénárt LGB http://ep.lgb.hu/jsep/
  52  * Part of my JavaScript based Enterprise-128 emulator ("JSep" aka "webemu").
  53  * Am9511 "APU" FPU emulation, somewhat (ehhh, a lot!) incorrect
  54  *
  55  * Thanks to Povi for testing APU support.
  56  *
  57  * http://www.hartetechnologies.com/manuals/AMD/AMD%209511%20FPU.pdf
  58  * http://www.joelowens.org/z80/am9511algorithms.pdf
  59  * http://www.joelowens.org/z80/am9511fpmanual.pdf
  60  *
  61  * Major problems with my emulation:
  62  *
  63  * Precision: converts data between APU formats and JS numeric, real Am9511 may give a sightly different results in case of floats.
  64  * Timing: uses constant timings, real APU varies execution times depending on the operands.
  65  * Stack content: real APU destroys some elements in case of some OPS other than TOS. This is not emulated.
  66  * APU status: I am not always sure what status flags modified and how.
  67  * Results: I am not always sure even about the result of ops. Eg: SMUL/SMUU, what happens on signed values, etc, result can be even WRONG.
  68  * Usage: emulation always assumes Z80 will be stopped, no WAIT/SRV etc (so bit 7 of command does not count either)
  69  * Cleanness: my code uses pop/push primitives which is often quite expensive, but the code is more compact and it's enough for a few MHz chip emulation in JS still :)
  70  */
  71
  72 //static uint8_t _apu_stack[16];
  73 //static int _apu_tos;
  74 //static uint8_t _apu_status;
  75
  76 // Note: NEGARG,ZERODIV,LARGE are truely not independent, you should not mix them, but use only one! Others can be "mixed"
  77
  78 #define _APU_F_CARRY             1
  79 #define _APU_F_OVERFLOW          2
  80 #define _APU_F_UNDERFLOW         4
  81 #define _APU_F_NEGARG            8
  82 #define _APU_F_ZERODIV          16
  83 #define _APU_F_LARGE            24
  84 #define _APU_F_ZERO             32
  85 #define _APU_F_SIGN             64
  86 #define _APU_F_BUSY             128 // this is not used, as APU for EP is used to stop Z80 while working, so Z80 will never found this bit set, thus there is no need to set ...
  87
  88 void AM9511::apu_reset ( void )
  89 {
  90         _apu_status = 0;
  91         _apu_tos = 0;
  92         memset(_apu_stack, 0, sizeof _apu_stack);
  93 }
  94
  95
  96 uint8_t AM9511::apu_read_status( void )
  97 {
  98         return _apu_status;
  99 }
 100
 101
 102 void AM9511::_apu_move( int n)
 103 {
 104         _apu_tos = (_apu_tos + n) & 0xF;
 105 }
 106
 107
 108 uint8_t AM9511::_apu_look8(int depth)
 109 {
 110         return _apu_stack[(_apu_tos - depth) & 0xF];
 111 }
 112
 113
 114 uint8_t AM9511::_apu_pop8()
 115 {
 116         _apu_move(-1);
 117         return _apu_look8(-1);
 118 }
 119
 120
 121 uint8_t AM9511::apu_read_data()
 122 {
 123         return _apu_pop8();
 124 }
 125
 126
 127 void AM9511::_apu_push8(uint8_t data)
 128 {
 129         _apu_move(1);
 130         //_apu_tos = (_apu_tos + 1) & 0xF;
 131         _apu_stack[_apu_tos] = data; // will be trucated to byte
 132 }
 133
 134
 135 void AM9511::apu_write_data(uint8_t data)
 136 {
 137         _apu_push8(data);
 138 }
 139
 140
 141 int  AM9511::_apu_pop_fix16(void) {
 142         int data = _apu_pop8() << 8;
 143         data |= _apu_pop8();
 144         if (data & 0x8000) data = data - 0x10000; // two's complement correction
 145         return data;
 146 }
 147
 148 // push fix16 format, also updates the status (zero, sign, overflow)
 149 void AM9511::_apu_push_fix16(int data) {
 150         if (data == 0) _apu_status |= _APU_F_ZERO; // zero flag
 151         else if (data < 0) {
 152                 _apu_status |= _APU_F_SIGN; // negative flag
 153                 data += 0x10000; // two's complement correction
 154         }
 155         if (data > 0xFFFF || data < 0) _apu_status |= _APU_F_OVERFLOW; // overflow flag [WTF]
 156         _apu_push8(data);
 157         _apu_push8(data >> 8);
 158 }
 159
 160 int64_t AM9511::_apu_pop_fix32(void) {
 161         int64_t data = _apu_pop8() << 24;
 162         data |= _apu_pop8() << 16;
 163         data |= _apu_pop8() << 8;
 164         data |= _apu_pop8();
 165         if (data > 2147483647L) data = data - 4294967296L; // two's complement correction
 166         return data;
 167 }
 168
 169 void AM9511::_apu_push_fix32(int64_t data) {
 170         if (data == 0) _apu_status |= _APU_F_ZERO;
 171         else if (data < 0) {
 172                 _apu_status |= _APU_F_SIGN;
 173                 data += 4294967296L;
 174         }
 175         if (data > 4294967295UL || data < 0) _apu_status |= _APU_F_OVERFLOW;
 176         _apu_push8(data);
 177         _apu_push8(data >> 8);
 178         _apu_push8(data >> 16);
 179         _apu_push8(data >> 24);
 180 }
 181
 182 /* Foreword for FLOAT handling: I use natural float (well, double ...)
 183  * numberic format of C, using pop/push APU functions to convert from/to.
 184  * This is kinda messy, and not bit-exact emulation of Am9511.
 185  * Even my lame push/pop functions can be done much better!!
 186  */
 187
 188
 189 double AM9511::_apu_pop_float()
 190 {
 191         int exp = _apu_pop8();
 192         int data = _apu_pop8() << 16;
 193         double fdata;
 194         data |= _apu_pop8() << 8;
 195         data |= _apu_pop8();
 196         if (!(data & 0x800000)) return 0.0; // MSB of mantissa must be 1 always, _except_ for the value zero, where all bytes should be zero (including the MSB of mantissa)
 197         if (exp & 128) data = -data;
 198         if (exp & 64) exp = (exp & 63) - 64; else exp &= 63;
 199         fdata = pow(2.0, exp) * ((double)data / 16777216.0);
 200         //DEBUG("APU: float is internally pop'ed: %f" NL, fdata);
 201         return fdata;
 202 }
 203
 204
 205 void AM9511::_apu_push_float(double data)
 206 {
 207         int neg, exp , i;
 208         if (!my_isfinite(data)) { // this should be true for the whole condition of argument is NaN of Infinity ...
 209                 _apu_push8(0); // bad result for NaN, but something should be there (_apu_move() would be better one to "rollback" the stack?!)
 210                 _apu_push8(0);
 211                 _apu_push8(0);
 212                 _apu_push8(0);
 213                 _apu_status |= _APU_F_LARGE;
 214                 return;
 215         }
 216         if (data == 0) { // if value is zero, we handle it as a special case, as logarithm function would panic on value of zero.
 217                 _apu_push8(0);
 218                 _apu_push8(0);
 219                 _apu_push8(0);
 220                 _apu_push8(0);
 221                 _apu_status |= _APU_F_ZERO; // zero flag
 222                 return;
 223         }
 224         neg = data < 0; // remember the sign of the value (bool)
 225         data = fabs(data);
 226         exp = my_log2(data);
 227         data = data / pow(2.0, exp);
 228         i = (data * 16777216.0);
 229         if (i >= 16777216) {
 230                 // ehm, not normalized mantissa or such a problem?
 231                 i >>= 1;
 232                 exp++;
 233         } else if (i == 0) {
 234                 exp = 0;
 235                 _apu_status |= _APU_F_ZERO | _APU_F_UNDERFLOW; // since we handled zero case at the begining, zero value here means the underflow-gap, I guess
 236         }
 237         if (exp > 63) {
 238                 exp &= 63;
 239                 _apu_status |= _APU_F_OVERFLOW;
 240         } else if (exp < -64) {
 241                 //exp = -((-exp) & 63); // WRONG! TODO, FIXME, HELP, ETC :D
 242                 exp = ((64 + exp) & 63) | 64;
 243                 _apu_status |= _APU_F_OVERFLOW;
 244         } else if (exp < 0) {
 245                 exp = ((64 + exp) & 63) | 64;
 246         }
 247         if (neg) {
 248                 exp |= 128;
 249                 _apu_status |= _APU_F_SIGN; // negative flag
 250         }
 251         //if (data && (!(data & 0x800000)))
 252         //      DEBUG("APU: warning: irregular manitssa: ", data);
 253         // Pushing 8 bit bytes onto the APU stack
 254         _apu_push8(i);
 255         _apu_push8(i >> 8);
 256         _apu_push8(i >> 16);
 257         _apu_push8(exp); // this byte holds the exponent, and also the sign of the mantissa
 258         //if (data == 0) _apu_status |= _APU_F_UNDERFLOW; // hmmm. zero case is handled at the beginning, so if it's zero we are in the underflow-gap of the format. or whatever :D
 259 }
 260
 261
 262 // set S and Z flags of status on TOS, interpreting it as fixed 16 format
 263 void AM9511:: _apu_sz_fix16(void) {
 264         if (_apu_look8(0) & 128) _apu_status |= _APU_F_SIGN;
 265         if (_apu_look8(0) + _apu_look8(1) == 0) _apu_status |= _APU_F_ZERO; // this testing method for zeroness works as apu_look8() gives back only unsigned bytes ...
 266 }
 267 void AM9511::_apu_sz_fix32(void) {
 268         if (_apu_look8(0) & 128) _apu_status |= _APU_F_SIGN;
 269         if (_apu_look8(0) + _apu_look8(1) + _apu_look8(2) + _apu_look8(3) == 0) _apu_status |= _APU_F_ZERO;
 270 }
 271 void AM9511::_apu_sz_float(void) {
 272         if (_apu_look8(0) & 128) _apu_status |= _APU_F_SIGN;
 273         if ((_apu_look8(1) & 128) == 0) _apu_status |= _APU_F_ZERO; // we use only a single bit to test the zeroness of a float.
 274 }
 275
 276
 277 void AM9511::_apu_xchg(int d1, int d2) {
 278         uint8_t n = _apu_look8(d1);
 279         _apu_stack[(_apu_tos - d1) & 0xF] = _apu_look8(d2);
 280         _apu_stack[(_apu_tos - d2) & 0xF] = n;
 281 }
 282 void AM9511::_apu_copy(int from, int to) {
 283         _apu_stack[(_apu_tos - to) & 0xF] = _apu_look8(from);
 284 }
 285
 286
 287 /* Note, call of this function should be AFTER calling _apu_push* functions as those may set overflow flag we want to keep as cleared here ...
 288  * I am still not sure about the difference of overflow and underflow, also not the over-/underflow and carry. For the second problem:
 289  * it's said that the maximal (or minimal value) can be extended by the carry flag, so there are three cases basically: a number can
 290  * be represented without overflow and carry, the number can be represented as carry to be thought of the extension of the result,
 291  * and the overflow, when the result can't represented even with the extended result size by the carry bit. Hmmm. But then, should
 292  * carry to be set in case of overflow, or not?
 293  * */
 294 void AM9511::_apu_carry ( int64_t val, int64_t limit )
 295 {
 296         if (val >= limit * 2 || val < -limit * 2) {
 297                 _apu_status |= _APU_F_OVERFLOW;
 298                 // should carry set here????????????????
 299                 _apu_status |= _APU_F_CARRY;
 300         } else if (val >= limit || val < -limit) {
 301                 _apu_status &= 255 - _APU_F_OVERFLOW;
 302                 _apu_status |= _APU_F_CARRY;
 303         }
 304 }
 305
 306
 307 /* Note: most of the command emulation uses the fix32/fix16/float POP/PUSH functions.
 308  * In some cases it's not the optimal solution (performance) but it's much simplier.
 309  * However in case of floats it can cause some odd things, ie APU-float<->C-double conversion
 310  * rounding problems on POP/PUSH ... Well maybe I will deal with this later versions,
 311  * now the short solution ... */
 312 void AM9511::apu_write_command ( uint8_t cmd )
 313 {
 314         int i;
 315         int64_t l;
 316         double f;
 317         //int _apu_tos_old = _apu_tos;
 318         int clocks = 0;
 319         _apu_status = 0; // I am still not sure that ops according to spec which "do not affect a flag" means that it is UNCHANGED from the previous op, or simply zero and never set. Hmmm.
 320         switch (cmd & 0x7F) { // note, SR (bit7) field of command is currently ignored!
 321                 /* --------------------------------------- */
 322                 /* ---- 16 bit fixed point operations ---- */
 323                 /* --------------------------------------- */
 324                 case 0x6C: // SADD: Add TOS to NOS. Result to NOS. Pop Stack.
 325                         i = _apu_pop_fix16() + _apu_pop_fix16();
 326                         _apu_push_fix16(i);
 327                         _apu_carry(i, 0x8000);
 328                         clocks = 17;
 329                         break;
 330                 case 0x6D: // SSUB: Substract TOS from NOS. Result to NOS. Pop Stack.
 331                         i = _apu_pop_fix16();
 332                         i = _apu_pop_fix16() - i;
 333                         _apu_push_fix16(i);
 334                         _apu_carry(i, 0x8000);
 335                         clocks = 31;
 336                         break;
 337                 case 0x6E: // SMUL: Multiply NOS by TOS. Lower result to NOS. Pop Stack.
 338                         i = _apu_pop_fix16() * _apu_pop_fix16();
 339                         _apu_push_fix16(i);
 340                         clocks = 89;
 341                         break;
 342                 case 0x76: // SMUU: Multiply NOS by TOS. Upper result to NOS. Pop Stack.
 343                         i = _apu_pop_fix16() * _apu_pop_fix16();
 344                         _apu_push_fix16(i >> 16);
 345                         clocks = 87;
 346                         break;
 347                 case 0x6F: // SDIV: Divide NOS by TOS. Result to NOS. Pop Stack.
 348                         i = _apu_pop_fix16(); // TOS
 349                         if (i) {
 350                                 _apu_push_fix16(_apu_pop_fix16() / i);
 351                                 clocks = 89;
 352                         } else { // TOS = 0, divide by zero error
 353                                 // TOS = 0 case, APU simply puts old NOS as result, that is, leave the original NOS, which is now the TOS
 354                                 _apu_status |= _APU_F_ZERODIV;
 355                                 clocks = 14;
 356                         }
 357                         break;
 358                 /* --------------------------------------- */
 359                 /* ---- 32 bit fixed point operations ---- */
 360                 /* --------------------------------------- */
 361                 case 0x2C: // DADD: Add TOS to NOS. Result to NOS. Pop Stack.
 362                         l = _apu_pop_fix32() + _apu_pop_fix32();
 363                         _apu_push_fix32(l);
 364                         _apu_carry(l, 0x80000000L);
 365                         clocks = 21;
 366                         break;
 367                 case 0x2D: // DSUB: Substract TOS from NOS. Result to NOS. Pop Stack.
 368                         l = _apu_pop_fix32();
 369                         l = _apu_pop_fix32() - l;
 370                         _apu_push_fix32(l);
 371                         _apu_carry(l, 0x80000000L);
 372                         clocks = 39;
 373                         break;
 374                 case 0x2E: // DMUL: Multiply NOS by TOS. Lower result to NOS. Pop Stack.
 375                         l = _apu_pop_fix32() * _apu_pop_fix32();
 376                         _apu_push_fix32(l);
 377                         clocks = 200;
 378                         break;
 379                 case 0x36: // DMUU: Multiply NOS by TOS. Upper result to NOS. Pop Stack.
 380                         l = _apu_pop_fix32() * _apu_pop_fix32();
 381                         _apu_push_fix32(l >> 32);
 382                         clocks = 200;
 383                         break;
 384                 case 0x2F: // DDIV: Divide NOS by TOS. Result to NOS. Pop Stack.
 385                         l = _apu_pop_fix32(); // TOS
 386                         if (l) {
 387                                 _apu_push_fix32(_apu_pop_fix32() / l);
 388                                 clocks = 200;
 389                         } else { // TOS = 0, divide by zero error
 390                                 // TOS = 0 case, APU simply puts old NOS as result, that is, leave the original NOS, which is now the TOS
 391                                 _apu_status |= _APU_F_ZERODIV;
 392                                 clocks = 18;
 393                         }
 394                         break;
 395                 /* -------------------------------------------------- */
 396                 /* ---- 32 bit floating point primary operations ---- */
 397                 /* -------------------------------------------------- */
 398                 case 0x10: // FADD: Add TOS to NOS. Result to NOS. Pop Stack.
 399                         f = _apu_pop_float();
 400                         _apu_push_float(_apu_pop_float() + f);
 401                         clocks = (f ? 200 : 24);
 402                         break;
 403                 case 0x11: // FSUB: Substract TOS from NOS. Result to NOS. Pop Stack.
 404                         f = _apu_pop_float();
 405                         _apu_push_float(_apu_pop_float() - f);
 406                         clocks = (f ? 200 : 26);
 407                         break;
 408                 case 0x12: // FMUL: Multiply NOS by TOS. Result to NOS. Pop Stack.
 409                         _apu_push_float(_apu_pop_float() * _apu_pop_float());
 410                         clocks = 150;
 411                         break;
 412                 case 0x13: // FDIV: Divide NOS by TOS. Result to NOS. Pop Stack.
 413                         f = _apu_pop_float();
 414                         if (f) {
 415                                 _apu_push_float(_apu_pop_float() / f);
 416                                 clocks = 170;
 417                         } else { // TOS = 0, divide by zero error
 418                                 // TOS = 0 case, APU simply puts old NOS as result, that is, leave the original NOS, which is now the TOS
 419                                 _apu_status |= _APU_F_ZERODIV;
 420                                 clocks = 22;
 421                         }
 422                         break;
 423                 /* -------------------------------------------------- */
 424                 /* ---- 32 bit floating point derived operations ---- */
 425                 /* -------------------------------------------------- */
 426                 case 0x01: // SQRT: Square Root of TOS. Result to TOS.
 427                         f = _apu_pop_float();
 428                         _apu_push_float(sqrt(fabs(f))); // we still want to do something with negative number ..., so use fabs() but set the error status on the next line too
 429                         if (f < 0) _apu_status |= _APU_F_NEGARG; // negative argument signal
 430                         clocks = 800;
 431                         break;
 432                 case 0x02: // SIN: Sine of TOS. Result to TOS.
 433                         _apu_push_float(sin(_apu_pop_float()));
 434                         clocks = 4000;
 435                         break;
 436                 case 0x03: // COS: Cosine of TOS. Result to TOS.
 437                         _apu_push_float(cos(_apu_pop_float()));
 438                         clocks = 4000;
 439                         break;
 440                 case 0x04: // TAN: Tangent of TOS. Result to TOS.
 441                         _apu_push_float(tan(_apu_pop_float()));
 442                         clocks = 5000;
 443                         break;
 444                 case 0x05: // ASIN: Inverse Sine of TOS. Result to TOS.
 445                         _apu_push_float(asin(_apu_pop_float()));
 446                         clocks = 7000;
 447                         break;
 448                 case 0x06: // ACOS: Inverse Cosine of TOS. Result to TOS.
 449                         _apu_push_float(acos(_apu_pop_float()));
 450                         clocks = 7000;
 451                         break;
 452                 case 0x07: // ATAN: Inverse Tangent of TOS. Result to TOS.
 453                         _apu_push_float(atan(_apu_pop_float()));
 454                         clocks = 5000;
 455                         break;
 456                 case 0x08: // LOG: Common Logarithm of TOS. Result to TOS.
 457                         f = _apu_pop_float();
 458                         if (f > 0) {
 459                                 _apu_push_float(log10(f));
 460                                 clocks = 5500;
 461                         } else {
 462                                 _apu_status |= _APU_F_NEGARG;
 463                                 _apu_move(4);
 464                                 clocks = 20;
 465                         }
 466                         break;
 467                 case 0x09: // LN: Natural Logarithm of TOS. Result to TOS.
 468                         f = _apu_pop_float();
 469                         if (f > 0) {
 470                                 _apu_push_float(log(f));
 471                                 clocks = 5500;
 472                         } else {
 473                                 _apu_status |= _APU_F_NEGARG;
 474                                 _apu_move(4);
 475                                 clocks = 20;
 476                         }
 477                         break;
 478                 case 0x0A: // EXP: "e" raised to power in TOS. Result to TOS.
 479                         f = _apu_pop_float();
 480                         _apu_push_float(pow(M_E, f));
 481                         clocks = (f > 32 ? 34 : 4000);
 482                         break;
 483                 case 0x0B: // PWR: NOS raised to power in TOS. Result to TOS. Pop Stack.
 484                         f = _apu_pop_float();
 485                         _apu_push_float(pow(_apu_pop_float(), f));
 486                         clocks = 10000;
 487                         break;
 488                 /* ------------------------------------------------ */
 489                 /* ---- data and stack manipulation operations ---- */
 490                 /* ------------------------------------------------ */
 491                 case 0x00: // NOP: does nothing (but clears status, however it's the first instruction done in the main func already
 492                         clocks = 4;
 493                         break;
 494
 495                 case 0x1F: // FIXS: Convert TOS from floating point format to fixed point format (16 bit).
 496                         _apu_push_fix16(_apu_pop_float());
 497                         clocks = 150;
 498                         break;
 499                 case 0x1E: // FIXD: Convert TOS from floating point format to fixed point format (32 bit).
 500                         _apu_push_fix32(_apu_pop_float());
 501                         clocks = 200;
 502                         break;
 503                 case 0x1D: // FLTS: Convert TOS from fixed point format (16 bit) to floating point format.
 504                         _apu_push_float(_apu_pop_fix16());
 505                         clocks = 100;
 506                         break;
 507                 case 0x1C: // FLTD: Convert TOS from fixed point format (32 bit) to floating point format.
 508                         _apu_push_float(_apu_pop_fix32());
 509                         clocks = 200;
 510                         break;
 511
 512                 case 0x74: // CHSS: Change sign of fixed point (16 bit) operand on TOS.
 513                         _apu_push_fix16(-_apu_pop_fix16());
 514                         clocks = 23;
 515                         break;
 516                 case 0x34: // CHSD: Change sign of fixed point (32 bit) operand on TOS.
 517                         _apu_push_fix32(-_apu_pop_fix32());
 518                         clocks = 27;
 519                         break;
 520                 case 0x15: // CHSF: Change sign of floating point operand on TOS. Note: that does not seem to be a big issue, as a single bit should be modified??
 521                         if (_apu_look8(1) & 128) { // if number is not zero
 522                                 _apu_stack[_apu_tos] ^= 128;
 523                                 if (_apu_stack[_apu_tos] & 128) _apu_status |= _APU_F_SIGN;
 524                         } else // if number is zero, nothing happens (but we sets zero flag)
 525                                 _apu_status |= _APU_F_ZERO;
 526                         clocks = 18;
 527                         break;
 528
 529
 530                 case 0x77: // PTOS: Push stack. Duplicate NOS to TOS.
 531                         _apu_move(2);
 532                         _apu_copy(2, 0);
 533                         _apu_copy(3, 1);
 534                         _apu_sz_fix16();
 535                         clocks = 16;
 536                         break;
 537                 case 0x37: // PTOD: Push stack. Duplicate NOS to TOS.
 538                         _apu_move(4);
 539                         _apu_copy(4, 0);
 540                         _apu_copy(5, 1);
 541                         _apu_copy(6, 2);
 542                         _apu_copy(7, 3);
 543                         _apu_sz_fix32();
 544                         clocks = 20;
 545                         break;
 546                 case 0x17: // PTOF: Push stack. Duplicate NOS to TOS.
 547                         _apu_move(4);
 548                         _apu_copy(4, 0);
 549                         _apu_copy(5, 1);
 550                         _apu_copy(6, 2);
 551                         _apu_copy(7, 3);
 552                         _apu_sz_float();
 553                         clocks = 20;
 554                         break;
 555
 556                 case 0x78: // POPS: Pop stack. Old NOS becomes new TOS, old TOS rotates to bottom.
 557                         _apu_move(-2);
 558                         _apu_sz_fix16(); // set "sz" (S and Z status flags) by inspecting (new) TOS
 559                         clocks = 10;
 560                         break;
 561                 case 0x38: // POPD: Pop stack. Old NOS becomes new TOS, old TOS rotates to bottom.
 562                         _apu_move(-4);
 563                         _apu_sz_fix32();
 564                         clocks = 12;
 565                         break;
 566                 case 0x18: // POPF: Pop stack. Old NOS becomes new TOS, old TOS rotates to bottom.
 567                         _apu_move(-4);
 568                         _apu_sz_float();
 569                         clocks = 12;
 570                         break;
 571
 572                 case 0x79: // XCHS: Exchange NOS and TOS. (16 bit fixed)
 573                         _apu_xchg(0, 2);
 574                         _apu_xchg(1, 3);
 575                         _apu_sz_fix16();
 576                         clocks = 18;
 577                         break;
 578                 case 0x39: // XCHD: Exchange NOS and TOS. (32 bit fixed)
 579                         _apu_xchg(0, 4);
 580                         _apu_xchg(1, 5);
 581                         _apu_xchg(2, 6);
 582                         _apu_xchg(3, 7);
 583                         _apu_sz_fix32();
 584                         clocks = 26;
 585                         break;
 586                 case 0x19: // XCHF: Exchange NOS and TOS. (float stuff)
 587                         _apu_xchg(0, 4);
 588                         _apu_xchg(1, 5);
 589                         _apu_xchg(2, 6);
 590                         _apu_xchg(3, 7);
 591                         _apu_sz_float();
 592                         clocks = 26;
 593                         break;
 594
 595                 case 0x1A: // PUPI: Push floating point constant PI onto TOS. Previous TOS becomes NOS.
 596                         _apu_push8(0xDA);
 597                         _apu_push8(0x0F);
 598                         _apu_push8(0xC9);
 599                         _apu_push8(0x02);
 600                         clocks = 16;
 601                         break;
 602
 603                 default:
 604 //                      DEBUG("APU: not implemented/unknown Am9511 command: %02Xh" NL, cmd);
 605                         clocks = 4; // no idea what happens.
 606                         break;
 607         }
 608 //      clocks *= CPU_CLOCK;
 609 //      z80ex_w_states((clocks % APU_CLOCK) ? ((clocks / APU_CLOCK) + 1) : (clocks / APU_CLOCK));
 610
 611         // set busy flag
 612         if(clocks > 0) {
 613                 if(register_id != -1) {
 614                         cancel_event(this, register_id);
 615                 }
 616                 register_event(this, 0, (1000000.0 * clocks) / _APU_CLOCK, false, &register_id);
 617
 618                 _apu_status |= _APU_F_BUSY;
 619         }
 620 }
 621
 622 void AM9511::initialize()
 623 {
 624         DEVICE::initialize();
 625         if(osd->check_feature(_T("APU_CLOCK"))) {
 626                 _APU_CLOCK = (double)osd->get_feature_int_value(_T("APU_CLOCK"));
 627         } else {
 628                 _APU_CLOCK = (double)osd->get_feature_int_value(_T("CPU_CLOCKS"));
 629         }
 630 }
 631
 632 void AM9511::reset()
 633 {
 634         apu_reset();
 635         register_id = -1;
 636 }
 637
 638 void AM9511::write_io8(uint32_t addr, uint32_t data)
 639 {
 640         if(addr & 1) {
 641 //              if(!(_apu_status & _APU_F_BUSY)) {
 642                         apu_write_command(data);
 643 //              }
 644         } else {
 645 //              if(!(_apu_status & _APU_F_BUSY)) {
 646                         apu_write_data(data);
 647 //              }
 648         }
 649 }
 650
 651 uint32_t AM9511::read_io8(uint32_t addr)
 652 {
 653         if(addr & 1) {
 654                 return apu_read_status();
 655         } else {
 656 //              if(!(_apu_status & _APU_F_BUSY)) {
 657                         return apu_read_data();
 658 //              }
 659 //              return 0;
 660         }
 661 }
 662
 663 void AM9511::event_callback(int event_id, int err)
 664 {
 665         // clear busy flag
 666         register_id = -1;
 667         _apu_status &= ~_APU_F_BUSY;
 668 }
 669
 670 #define STATE_VERSION   1
 671
 672 bool AM9511::process_state(FILEIO* state_fio, bool loading)
 673 {
 674         if(!state_fio->StateCheckUint32(STATE_VERSION)) {
 675                 return false;
 676         }
 677         if(!state_fio->StateCheckInt32(this_device_id)) {
 678                 return false;
 679         }
 680         state_fio->StateBuffer(_apu_stack, sizeof(_apu_stack), 1);
 681         state_fio->StateInt32(_apu_tos);
 682         state_fio->StateUint8(_apu_status);
 683         state_fio->StateInt32(register_id);
 684         return true;
 685 }
 686