OSDN Git Service

[VM][I386] (Re-) Make functions i__FASTCALL.
authorK.Ohta <whatisthis.sowhat@gmail.com>
Thu, 27 Jun 2019 07:17:08 +0000 (16:17 +0900)
committerK.Ohta <whatisthis.sowhat@gmail.com>
Thu, 27 Jun 2019 07:17:08 +0000 (16:17 +0900)
source/src/vm/mame/emu/cpu/i386/i386.c
source/src/vm/mame/emu/cpu/i386/i386op16.c
source/src/vm/mame/emu/cpu/i386/i386op32.c
source/src/vm/mame/emu/cpu/i386/i386ops.c
source/src/vm/mame/emu/cpu/i386/i386ops.h
source/src/vm/mame/emu/cpu/i386/i386priv.h
source/src/vm/mame/emu/cpu/i386/i486ops.c
source/src/vm/mame/emu/cpu/i386/pentops.c
source/src/vm/mame/emu/cpu/i386/x87ops.c

index 0a11357..d774237 100644 (file)
@@ -42,7 +42,7 @@ static void cpu_reset_generic(i386_state *cpustate);
 int i386_parity_table[256];
 MODRM_TABLE i386_MODRM_table[256];
 
-static void i386_trap_with_error(i386_state* cpustate, int irq, int irq_gate, int trap_level, UINT32 err, int is_top);
+static void __FASTCALL i386_trap_with_error(i386_state* cpustate, int irq, int irq_gate, int trap_level, UINT32 err, int is_top);
 static void i286_task_switch(i386_state* cpustate, UINT16 selector, UINT8 nested);
 static void i386_task_switch(i386_state* cpustate, UINT16 selector, UINT8 nested);
 static void build_opcode_table(i386_state *cpustate, UINT32 features);
@@ -488,7 +488,7 @@ static UINT32 i386_get_stack_ptr(i386_state* cpustate, UINT8 privilege)
 }
 
 /* Check segment register for validity when changing privilege level after an RETF */
-/*static*/ INLINE void i386_check_sreg_validity(i386_state* cpustate, int reg)
+/*static*/ INLINE void __FASTCALL i386_check_sreg_validity(i386_state* cpustate, int reg)
 {
        UINT16 selector = cpustate->sreg[reg].selector;
        UINT8 CPL = cpustate->CPL;
@@ -560,7 +560,7 @@ static int i386_limit_check(i386_state *cpustate, int seg, UINT32 offset, UINT32
        return 0;
 }
 #endif
-static void i386_sreg_load(i386_state *cpustate, UINT16 selector, UINT8 reg, bool *fault)
+static void __FASTCALL i386_sreg_load(i386_state *cpustate, UINT16 selector, UINT8 reg, bool *fault)
 {
        // Checks done when MOV changes a segment register in protected mode
        UINT8 CPL,RPL,DPL;
@@ -701,7 +701,7 @@ static void i386_sreg_load(i386_state *cpustate, UINT16 selector, UINT8 reg, boo
        if(fault) *fault = false;
 }
 
-static void i386_trap(i386_state *cpustate,int irq, int irq_gate, int trap_level)
+static void __FASTCALL i386_trap(i386_state *cpustate,int irq, int irq_gate, int trap_level)
 {
        /*  I386 Interrupts/Traps/Faults:
         *
@@ -1179,7 +1179,7 @@ static void i386_trap(i386_state *cpustate,int irq, int irq_gate, int trap_level
 
 }
 
-static void i386_trap_with_error(i386_state *cpustate,int irq, int irq_gate, int trap_level, UINT32 error, int is_top)
+static void __FASTCALL i386_trap_with_error(i386_state *cpustate,int irq, int irq_gate, int trap_level, UINT32 error, int is_top)
 {
        // buffering direct call from trap.
        if(is_top != 0) {
@@ -3062,18 +3062,18 @@ static void __FASTCALL report_invalid_modrm(i386_state *cpustate, const char* op
 }
 
 /* Forward declarations */
-static void I386OP(decode_opcode)(i386_state *cpustate);
-static void I386OP(decode_two_byte)(i386_state *cpustate);
-static void I386OP(decode_three_byte38)(i386_state *cpustate);
-static void I386OP(decode_three_byte3a)(i386_state *cpustate);
-static void I386OP(decode_three_byte66)(i386_state *cpustate);
-static void I386OP(decode_three_bytef2)(i386_state *cpustate);
-static void I386OP(decode_three_bytef3)(i386_state *cpustate);
-static void I386OP(decode_four_byte3866)(i386_state *cpustate);
-static void I386OP(decode_four_byte3a66)(i386_state *cpustate);
-static void I386OP(decode_four_byte38f2)(i386_state *cpustate);
-static void I386OP(decode_four_byte3af2)(i386_state *cpustate);
-static void I386OP(decode_four_byte38f3)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_opcode)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_two_byte)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_three_byte38)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_three_byte3a)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_three_byte66)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_three_bytef2)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_three_bytef3)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_four_byte3866)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_four_byte3a66)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_four_byte38f2)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_four_byte3af2)(i386_state *cpustate);
+static void __FASTCALL I386OP(decode_four_byte38f3)(i386_state *cpustate);
 
 
 
@@ -3085,7 +3085,7 @@ static void I386OP(decode_four_byte38f3)(i386_state *cpustate);
 #include "x87ops.c"
 #include "i386ops.h"
 
-static void I386OP(decode_opcode)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_opcode)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
 
@@ -3099,7 +3099,7 @@ static void I386OP(decode_opcode)(i386_state *cpustate)
 }
 
 /* Two-byte opcode 0f xx */
-static void I386OP(decode_two_byte)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_two_byte)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
 
@@ -3113,7 +3113,7 @@ static void I386OP(decode_two_byte)(i386_state *cpustate)
 }
 
 /* Three-byte opcode 0f 38 xx */
-static void I386OP(decode_three_byte38)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_three_byte38)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
 
@@ -3124,7 +3124,7 @@ static void I386OP(decode_three_byte38)(i386_state *cpustate)
 }
 
 /* Three-byte opcode 0f 3a xx */
-static void I386OP(decode_three_byte3a)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_three_byte3a)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
 
@@ -3135,7 +3135,7 @@ static void I386OP(decode_three_byte3a)(i386_state *cpustate)
 }
 
 /* Three-byte opcode prefix 66 0f xx */
-static void I386OP(decode_three_byte66)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_three_byte66)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
        if( cpustate->operand_size )
@@ -3145,7 +3145,7 @@ static void I386OP(decode_three_byte66)(i386_state *cpustate)
 }
 
 /* Three-byte opcode prefix f2 0f xx */
-static void I386OP(decode_three_bytef2)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_three_bytef2)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
        if( cpustate->operand_size )
@@ -3155,7 +3155,7 @@ static void I386OP(decode_three_bytef2)(i386_state *cpustate)
 }
 
 /* Three-byte opcode prefix f3 0f */
-static void I386OP(decode_three_bytef3)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_three_bytef3)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
        if( cpustate->operand_size )
@@ -3165,7 +3165,7 @@ static void I386OP(decode_three_bytef3)(i386_state *cpustate)
 }
 
 /* Four-byte opcode prefix 66 0f 38 xx */
-static void I386OP(decode_four_byte3866)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_four_byte3866)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
        if (cpustate->operand_size)
@@ -3175,7 +3175,7 @@ static void I386OP(decode_four_byte3866)(i386_state *cpustate)
 }
 
 /* Four-byte opcode prefix 66 0f 3a xx */
-static void I386OP(decode_four_byte3a66)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_four_byte3a66)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
        if (cpustate->operand_size)
@@ -3185,7 +3185,7 @@ static void I386OP(decode_four_byte3a66)(i386_state *cpustate)
 }
 
 /* Four-byte opcode prefix f2 0f 38 xx */
-static void I386OP(decode_four_byte38f2)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_four_byte38f2)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
        if (cpustate->operand_size)
@@ -3195,7 +3195,7 @@ static void I386OP(decode_four_byte38f2)(i386_state *cpustate)
 }
 
 /* Four-byte opcode prefix f2 0f 3a xx */
-static void I386OP(decode_four_byte3af2)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_four_byte3af2)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
        if (cpustate->operand_size)
@@ -3205,7 +3205,7 @@ static void I386OP(decode_four_byte3af2)(i386_state *cpustate)
 }
 
 /* Four-byte opcode prefix f3 0f 38 xx */
-static void I386OP(decode_four_byte38f3)(i386_state *cpustate)
+static void __FASTCALL I386OP(decode_four_byte38f3)(i386_state *cpustate)
 {
        cpustate->opcode = FETCH(cpustate);
        if (cpustate->operand_size)
index 35e0358..272bc09 100644 (file)
@@ -1,6 +1,6 @@
 // license:BSD-3-Clause
 // copyright-holders:Ville Linde, Barry Rodewald, Carl, Philip Bennett
-static UINT16 I386OP(shift_rotate16)(i386_state *cpustate, UINT8 modrm, UINT32 value, UINT8 shift)
+static UINT16 __FASTCALL I386OP(shift_rotate16)(i386_state *cpustate, UINT8 modrm, UINT32 value, UINT8 shift)
 {
        UINT32 src = value & 0xffff;
        UINT16 dst = value;
@@ -142,7 +142,7 @@ static UINT16 I386OP(shift_rotate16)(i386_state *cpustate, UINT8 modrm, UINT32 v
 
 
 
-static void I386OP(adc_rm16_r16)(i386_state *cpustate)      // Opcode 0x11
+static void __FASTCALL I386OP(adc_rm16_r16)(i386_state *cpustate)      // Opcode 0x11
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -162,7 +162,7 @@ static void I386OP(adc_rm16_r16)(i386_state *cpustate)      // Opcode 0x11
        }
 }
 
-static void I386OP(adc_r16_rm16)(i386_state *cpustate)      // Opcode 0x13
+static void __FASTCALL I386OP(adc_r16_rm16)(i386_state *cpustate)      // Opcode 0x13
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -182,7 +182,7 @@ static void I386OP(adc_r16_rm16)(i386_state *cpustate)      // Opcode 0x13
        }
 }
 
-static void I386OP(adc_ax_i16)(i386_state *cpustate)        // Opcode 0x15
+static void __FASTCALL I386OP(adc_ax_i16)(i386_state *cpustate)        // Opcode 0x15
 {
        UINT16 src, dst;
        src = FETCH16(cpustate);
@@ -192,7 +192,7 @@ static void I386OP(adc_ax_i16)(i386_state *cpustate)        // Opcode 0x15
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(add_rm16_r16)(i386_state *cpustate)      // Opcode 0x01
+static void __FASTCALL I386OP(add_rm16_r16)(i386_state *cpustate)      // Opcode 0x01
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -212,7 +212,7 @@ static void I386OP(add_rm16_r16)(i386_state *cpustate)      // Opcode 0x01
        }
 }
 
-static void I386OP(add_r16_rm16)(i386_state *cpustate)      // Opcode 0x03
+static void __FASTCALL I386OP(add_r16_rm16)(i386_state *cpustate)      // Opcode 0x03
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -232,7 +232,7 @@ static void I386OP(add_r16_rm16)(i386_state *cpustate)      // Opcode 0x03
        }
 }
 
-static void I386OP(add_ax_i16)(i386_state *cpustate)        // Opcode 0x05
+static void __FASTCALL I386OP(add_ax_i16)(i386_state *cpustate)        // Opcode 0x05
 {
        UINT16 src, dst;
        src = FETCH16(cpustate);
@@ -242,7 +242,7 @@ static void I386OP(add_ax_i16)(i386_state *cpustate)        // Opcode 0x05
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(and_rm16_r16)(i386_state *cpustate)      // Opcode 0x21
+static void __FASTCALL I386OP(and_rm16_r16)(i386_state *cpustate)      // Opcode 0x21
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -262,7 +262,7 @@ static void I386OP(and_rm16_r16)(i386_state *cpustate)      // Opcode 0x21
        }
 }
 
-static void I386OP(and_r16_rm16)(i386_state *cpustate)      // Opcode 0x23
+static void __FASTCALL I386OP(and_r16_rm16)(i386_state *cpustate)      // Opcode 0x23
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -282,7 +282,7 @@ static void I386OP(and_r16_rm16)(i386_state *cpustate)      // Opcode 0x23
        }
 }
 
-static void I386OP(and_ax_i16)(i386_state *cpustate)        // Opcode 0x25
+static void __FASTCALL I386OP(and_ax_i16)(i386_state *cpustate)        // Opcode 0x25
 {
        UINT16 src, dst;
        src = FETCH16(cpustate);
@@ -292,7 +292,7 @@ static void I386OP(and_ax_i16)(i386_state *cpustate)        // Opcode 0x25
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(bsf_r16_rm16)(i386_state *cpustate)      // Opcode 0x0f bc
+static void __FASTCALL I386OP(bsf_r16_rm16)(i386_state *cpustate)      // Opcode 0x0f bc
 {
        UINT16 src, dst, temp;
        UINT8 modrm = FETCH(cpustate);
@@ -321,7 +321,7 @@ static void I386OP(bsf_r16_rm16)(i386_state *cpustate)      // Opcode 0x0f bc
        CYCLES(cpustate,CYCLES_BSF_BASE);
 }
 
-static void I386OP(bsr_r16_rm16)(i386_state *cpustate)      // Opcode 0x0f bd
+static void __FASTCALL I386OP(bsr_r16_rm16)(i386_state *cpustate)      // Opcode 0x0f bd
 {
        UINT16 src, dst, temp;
        UINT8 modrm = FETCH(cpustate);
@@ -351,7 +351,7 @@ static void I386OP(bsr_r16_rm16)(i386_state *cpustate)      // Opcode 0x0f bd
 }
 
 
-static void I386OP(bt_rm16_r16)(i386_state *cpustate)       // Opcode 0x0f a3
+static void __FASTCALL I386OP(bt_rm16_r16)(i386_state *cpustate)       // Opcode 0x0f a3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -382,7 +382,7 @@ static void I386OP(bt_rm16_r16)(i386_state *cpustate)       // Opcode 0x0f a3
        }
 }
 
-static void I386OP(btc_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f bb
+static void __FASTCALL I386OP(btc_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f bb
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -417,7 +417,7 @@ static void I386OP(btc_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f bb
        }
 }
 
-static void I386OP(btr_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f b3
+static void __FASTCALL I386OP(btr_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f b3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -452,7 +452,7 @@ static void I386OP(btr_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f b3
        }
 }
 
-static void I386OP(bts_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f ab
+static void __FASTCALL I386OP(bts_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f ab
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -487,7 +487,7 @@ static void I386OP(bts_rm16_r16)(i386_state *cpustate)      // Opcode 0x0f ab
        }
 }
 
-static void I386OP(call_abs16)(i386_state *cpustate)        // Opcode 0x9a
+static void __FASTCALL I386OP(call_abs16)(i386_state *cpustate)        // Opcode 0x9a
 {
        UINT16 offset = FETCH16(cpustate);
        UINT16 ptr = FETCH16(cpustate);
@@ -524,7 +524,7 @@ static void I386OP(call_abs16)(i386_state *cpustate)        // Opcode 0x9a
        CHANGE_PC(cpustate,cpustate->eip);
 }
 
-static void I386OP(call_rel16)(i386_state *cpustate)        // Opcode 0xe8
+static void __FASTCALL I386OP(call_rel16)(i386_state *cpustate)        // Opcode 0xe8
 {
        INT16 disp = FETCH16(cpustate);
 
@@ -541,13 +541,13 @@ static void I386OP(call_rel16)(i386_state *cpustate)        // Opcode 0xe8
        CYCLES(cpustate,CYCLES_CALL);       /* TODO: Timing = 7 + m */
 }
 
-static void I386OP(cbw)(i386_state *cpustate)               // Opcode 0x98
+static void __FASTCALL I386OP(cbw)(i386_state *cpustate)               // Opcode 0x98
 {
        REG16(AX) = (INT16)((INT8)REG8(AL));
        CYCLES(cpustate,CYCLES_CBW);
 }
 
-static void I386OP(cmp_rm16_r16)(i386_state *cpustate)      // Opcode 0x39
+static void __FASTCALL I386OP(cmp_rm16_r16)(i386_state *cpustate)      // Opcode 0x39
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -565,7 +565,7 @@ static void I386OP(cmp_rm16_r16)(i386_state *cpustate)      // Opcode 0x39
        }
 }
 
-static void I386OP(cmp_r16_rm16)(i386_state *cpustate)      // Opcode 0x3b
+static void __FASTCALL I386OP(cmp_r16_rm16)(i386_state *cpustate)      // Opcode 0x3b
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -583,7 +583,7 @@ static void I386OP(cmp_r16_rm16)(i386_state *cpustate)      // Opcode 0x3b
        }
 }
 
-static void I386OP(cmp_ax_i16)(i386_state *cpustate)        // Opcode 0x3d
+static void __FASTCALL I386OP(cmp_ax_i16)(i386_state *cpustate)        // Opcode 0x3d
 {
        UINT16 src, dst;
        src = FETCH16(cpustate);
@@ -592,7 +592,7 @@ static void I386OP(cmp_ax_i16)(i386_state *cpustate)        // Opcode 0x3d
        CYCLES(cpustate,CYCLES_CMP_IMM_ACC);
 }
 
-static void I386OP(cmpsw)(i386_state *cpustate)             // Opcode 0xa7
+static void __FASTCALL I386OP(cmpsw)(i386_state *cpustate)             // Opcode 0xa7
 {
        UINT32 eas, ead;
        UINT16 src, dst;
@@ -610,7 +610,7 @@ static void I386OP(cmpsw)(i386_state *cpustate)             // Opcode 0xa7
        CYCLES(cpustate,CYCLES_CMPS);
 }
 
-static void I386OP(cwd)(i386_state *cpustate)               // Opcode 0x99
+static void __FASTCALL I386OP(cwd)(i386_state *cpustate)               // Opcode 0x99
 {
        if( REG16(AX) & 0x8000 ) {
                REG16(DX) = 0xffff;
@@ -620,55 +620,55 @@ static void I386OP(cwd)(i386_state *cpustate)               // Opcode 0x99
        CYCLES(cpustate,CYCLES_CWD);
 }
 
-static void I386OP(dec_ax)(i386_state *cpustate)            // Opcode 0x48
+static void __FASTCALL I386OP(dec_ax)(i386_state *cpustate)            // Opcode 0x48
 {
        REG16(AX) = DEC16(cpustate, REG16(AX) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_cx)(i386_state *cpustate)            // Opcode 0x49
+static void __FASTCALL I386OP(dec_cx)(i386_state *cpustate)            // Opcode 0x49
 {
        REG16(CX) = DEC16(cpustate, REG16(CX) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_dx)(i386_state *cpustate)            // Opcode 0x4a
+static void __FASTCALL I386OP(dec_dx)(i386_state *cpustate)            // Opcode 0x4a
 {
        REG16(DX) = DEC16(cpustate, REG16(DX) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_bx)(i386_state *cpustate)            // Opcode 0x4b
+static void __FASTCALL I386OP(dec_bx)(i386_state *cpustate)            // Opcode 0x4b
 {
        REG16(BX) = DEC16(cpustate, REG16(BX) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_sp)(i386_state *cpustate)            // Opcode 0x4c
+static void __FASTCALL I386OP(dec_sp)(i386_state *cpustate)            // Opcode 0x4c
 {
        REG16(SP) = DEC16(cpustate, REG16(SP) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_bp)(i386_state *cpustate)            // Opcode 0x4d
+static void __FASTCALL I386OP(dec_bp)(i386_state *cpustate)            // Opcode 0x4d
 {
        REG16(BP) = DEC16(cpustate, REG16(BP) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_si)(i386_state *cpustate)            // Opcode 0x4e
+static void __FASTCALL I386OP(dec_si)(i386_state *cpustate)            // Opcode 0x4e
 {
        REG16(SI) = DEC16(cpustate, REG16(SI) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_di)(i386_state *cpustate)            // Opcode 0x4f
+static void __FASTCALL I386OP(dec_di)(i386_state *cpustate)            // Opcode 0x4f
 {
        REG16(DI) = DEC16(cpustate, REG16(DI) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(imul_r16_rm16)(i386_state *cpustate)     // Opcode 0x0f af
+static void __FASTCALL I386OP(imul_r16_rm16)(i386_state *cpustate)     // Opcode 0x0f af
 {
        UINT8 modrm = FETCH(cpustate);
        INT32 result;
@@ -690,7 +690,7 @@ static void I386OP(imul_r16_rm16)(i386_state *cpustate)     // Opcode 0x0f af
        cpustate->CF = cpustate->OF = !(result == (INT32)(INT16)result);
 }
 
-static void I386OP(imul_r16_rm16_i16)(i386_state *cpustate) // Opcode 0x69
+static void __FASTCALL I386OP(imul_r16_rm16_i16)(i386_state *cpustate) // Opcode 0x69
 {
        UINT8 modrm = FETCH(cpustate);
        INT32 result;
@@ -712,7 +712,7 @@ static void I386OP(imul_r16_rm16_i16)(i386_state *cpustate) // Opcode 0x69
        cpustate->CF = cpustate->OF = !(result == (INT32)(INT16)result);
 }
 
-static void I386OP(imul_r16_rm16_i8)(i386_state *cpustate)  // Opcode 0x6b
+static void __FASTCALL I386OP(imul_r16_rm16_i8)(i386_state *cpustate)  // Opcode 0x6b
 {
        UINT8 modrm = FETCH(cpustate);
        INT32 result;
@@ -734,7 +734,7 @@ static void I386OP(imul_r16_rm16_i8)(i386_state *cpustate)  // Opcode 0x6b
        cpustate->CF = cpustate->OF = !(result == (INT32)(INT16)result);
 }
 
-static void I386OP(in_ax_i8)(i386_state *cpustate)          // Opcode 0xe5
+static void __FASTCALL I386OP(in_ax_i8)(i386_state *cpustate)          // Opcode 0xe5
 {
        UINT16 port = FETCH(cpustate);
        UINT16 data = READPORT16(cpustate, port);
@@ -742,7 +742,7 @@ static void I386OP(in_ax_i8)(i386_state *cpustate)          // Opcode 0xe5
        CYCLES(cpustate,CYCLES_IN_VAR);
 }
 
-static void I386OP(in_ax_dx)(i386_state *cpustate)          // Opcode 0xed
+static void __FASTCALL I386OP(in_ax_dx)(i386_state *cpustate)          // Opcode 0xed
 {
        UINT16 port = REG16(DX);
        UINT16 data = READPORT16(cpustate, port);
@@ -750,55 +750,55 @@ static void I386OP(in_ax_dx)(i386_state *cpustate)          // Opcode 0xed
        CYCLES(cpustate,CYCLES_IN);
 }
 
-static void I386OP(inc_ax)(i386_state *cpustate)            // Opcode 0x40
+static void __FASTCALL I386OP(inc_ax)(i386_state *cpustate)            // Opcode 0x40
 {
        REG16(AX) = INC16(cpustate, REG16(AX) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_cx)(i386_state *cpustate)            // Opcode 0x41
+static void __FASTCALL I386OP(inc_cx)(i386_state *cpustate)            // Opcode 0x41
 {
        REG16(CX) = INC16(cpustate, REG16(CX) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_dx)(i386_state *cpustate)            // Opcode 0x42
+static void __FASTCALL I386OP(inc_dx)(i386_state *cpustate)            // Opcode 0x42
 {
        REG16(DX) = INC16(cpustate, REG16(DX) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_bx)(i386_state *cpustate)            // Opcode 0x43
+static void __FASTCALL I386OP(inc_bx)(i386_state *cpustate)            // Opcode 0x43
 {
        REG16(BX) = INC16(cpustate, REG16(BX) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_sp)(i386_state *cpustate)            // Opcode 0x44
+static void __FASTCALL I386OP(inc_sp)(i386_state *cpustate)            // Opcode 0x44
 {
        REG16(SP) = INC16(cpustate, REG16(SP) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_bp)(i386_state *cpustate)            // Opcode 0x45
+static void __FASTCALL I386OP(inc_bp)(i386_state *cpustate)            // Opcode 0x45
 {
        REG16(BP) = INC16(cpustate, REG16(BP) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_si)(i386_state *cpustate)            // Opcode 0x46
+static void __FASTCALL I386OP(inc_si)(i386_state *cpustate)            // Opcode 0x46
 {
        REG16(SI) = INC16(cpustate, REG16(SI) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_di)(i386_state *cpustate)            // Opcode 0x47
+static void __FASTCALL I386OP(inc_di)(i386_state *cpustate)            // Opcode 0x47
 {
        REG16(DI) = INC16(cpustate, REG16(DI) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(iret16)(i386_state *cpustate)            // Opcode 0xcf
+static void __FASTCALL I386OP(iret16)(i386_state *cpustate)            // Opcode 0xcf
 {
        if( PROTECTED_MODE )
        {
@@ -817,7 +817,7 @@ static void I386OP(iret16)(i386_state *cpustate)            // Opcode 0xcf
        CYCLES(cpustate,CYCLES_IRET);
 }
 
-static void I386OP(ja_rel16)(i386_state *cpustate)          // Opcode 0x0f 87
+static void __FASTCALL I386OP(ja_rel16)(i386_state *cpustate)          // Opcode 0x0f 87
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->CF == 0 && cpustate->ZF == 0 ) {
@@ -836,7 +836,7 @@ static void I386OP(ja_rel16)(i386_state *cpustate)          // Opcode 0x0f 87
        }
 }
 
-static void I386OP(jbe_rel16)(i386_state *cpustate)         // Opcode 0x0f 86
+static void __FASTCALL I386OP(jbe_rel16)(i386_state *cpustate)         // Opcode 0x0f 86
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->CF != 0 || cpustate->ZF != 0 ) {
@@ -855,7 +855,7 @@ static void I386OP(jbe_rel16)(i386_state *cpustate)         // Opcode 0x0f 86
        }
 }
 
-static void I386OP(jc_rel16)(i386_state *cpustate)          // Opcode 0x0f 82
+static void __FASTCALL I386OP(jc_rel16)(i386_state *cpustate)          // Opcode 0x0f 82
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->CF != 0 ) {
@@ -874,7 +874,7 @@ static void I386OP(jc_rel16)(i386_state *cpustate)          // Opcode 0x0f 82
        }
 }
 
-static void I386OP(jg_rel16)(i386_state *cpustate)          // Opcode 0x0f 8f
+static void __FASTCALL I386OP(jg_rel16)(i386_state *cpustate)          // Opcode 0x0f 8f
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->ZF == 0 && (cpustate->SF == cpustate->OF) ) {
@@ -893,7 +893,7 @@ static void I386OP(jg_rel16)(i386_state *cpustate)          // Opcode 0x0f 8f
        }
 }
 
-static void I386OP(jge_rel16)(i386_state *cpustate)         // Opcode 0x0f 8d
+static void __FASTCALL I386OP(jge_rel16)(i386_state *cpustate)         // Opcode 0x0f 8d
 {
        INT16 disp = FETCH16(cpustate);
        if(cpustate->SF == cpustate->OF) {
@@ -912,7 +912,7 @@ static void I386OP(jge_rel16)(i386_state *cpustate)         // Opcode 0x0f 8d
        }
 }
 
-static void I386OP(jl_rel16)(i386_state *cpustate)          // Opcode 0x0f 8c
+static void __FASTCALL I386OP(jl_rel16)(i386_state *cpustate)          // Opcode 0x0f 8c
 {
        INT16 disp = FETCH16(cpustate);
        if( (cpustate->SF != cpustate->OF) ) {
@@ -931,7 +931,7 @@ static void I386OP(jl_rel16)(i386_state *cpustate)          // Opcode 0x0f 8c
        }
 }
 
-static void I386OP(jle_rel16)(i386_state *cpustate)         // Opcode 0x0f 8e
+static void __FASTCALL I386OP(jle_rel16)(i386_state *cpustate)         // Opcode 0x0f 8e
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->ZF != 0 || (cpustate->SF != cpustate->OF) ) {
@@ -950,7 +950,7 @@ static void I386OP(jle_rel16)(i386_state *cpustate)         // Opcode 0x0f 8e
        }
 }
 
-static void I386OP(jnc_rel16)(i386_state *cpustate)         // Opcode 0x0f 83
+static void __FASTCALL I386OP(jnc_rel16)(i386_state *cpustate)         // Opcode 0x0f 83
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->CF == 0 ) {
@@ -969,7 +969,7 @@ static void I386OP(jnc_rel16)(i386_state *cpustate)         // Opcode 0x0f 83
        }
 }
 
-static void I386OP(jno_rel16)(i386_state *cpustate)         // Opcode 0x0f 81
+static void __FASTCALL I386OP(jno_rel16)(i386_state *cpustate)         // Opcode 0x0f 81
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->OF == 0 ) {
@@ -988,7 +988,7 @@ static void I386OP(jno_rel16)(i386_state *cpustate)         // Opcode 0x0f 81
        }
 }
 
-static void I386OP(jnp_rel16)(i386_state *cpustate)         // Opcode 0x0f 8b
+static void __FASTCALL I386OP(jnp_rel16)(i386_state *cpustate)         // Opcode 0x0f 8b
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->PF == 0 ) {
@@ -1007,7 +1007,7 @@ static void I386OP(jnp_rel16)(i386_state *cpustate)         // Opcode 0x0f 8b
        }
 }
 
-static void I386OP(jns_rel16)(i386_state *cpustate)         // Opcode 0x0f 89
+static void __FASTCALL I386OP(jns_rel16)(i386_state *cpustate)         // Opcode 0x0f 89
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->SF == 0 ) {
@@ -1026,7 +1026,7 @@ static void I386OP(jns_rel16)(i386_state *cpustate)         // Opcode 0x0f 89
        }
 }
 
-static void I386OP(jnz_rel16)(i386_state *cpustate)         // Opcode 0x0f 85
+static void __FASTCALL I386OP(jnz_rel16)(i386_state *cpustate)         // Opcode 0x0f 85
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->ZF == 0 ) {
@@ -1045,7 +1045,7 @@ static void I386OP(jnz_rel16)(i386_state *cpustate)         // Opcode 0x0f 85
        }
 }
 
-static void I386OP(jo_rel16)(i386_state *cpustate)          // Opcode 0x0f 80
+static void __FASTCALL I386OP(jo_rel16)(i386_state *cpustate)          // Opcode 0x0f 80
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->OF != 0 ) {
@@ -1064,7 +1064,7 @@ static void I386OP(jo_rel16)(i386_state *cpustate)          // Opcode 0x0f 80
        }
 }
 
-static void I386OP(jp_rel16)(i386_state *cpustate)          // Opcode 0x0f 8a
+static void __FASTCALL I386OP(jp_rel16)(i386_state *cpustate)          // Opcode 0x0f 8a
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->PF != 0 ) {
@@ -1083,7 +1083,7 @@ static void I386OP(jp_rel16)(i386_state *cpustate)          // Opcode 0x0f 8a
        }
 }
 
-static void I386OP(js_rel16)(i386_state *cpustate)          // Opcode 0x0f 88
+static void __FASTCALL I386OP(js_rel16)(i386_state *cpustate)          // Opcode 0x0f 88
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->SF != 0 ) {
@@ -1102,7 +1102,7 @@ static void I386OP(js_rel16)(i386_state *cpustate)          // Opcode 0x0f 88
        }
 }
 
-static void I386OP(jz_rel16)(i386_state *cpustate)          // Opcode 0x0f 84
+static void __FASTCALL I386OP(jz_rel16)(i386_state *cpustate)          // Opcode 0x0f 84
 {
        INT16 disp = FETCH16(cpustate);
        if( cpustate->ZF != 0 ) {
@@ -1121,7 +1121,7 @@ static void I386OP(jz_rel16)(i386_state *cpustate)          // Opcode 0x0f 84
        }
 }
 
-static void I386OP(jcxz16)(i386_state *cpustate)            // Opcode 0xe3
+static void __FASTCALL I386OP(jcxz16)(i386_state *cpustate)            // Opcode 0xe3
 {
        INT8 disp = FETCH(cpustate);
        int val = (cpustate->address_size)?(REG32(ECX) == 0):(REG16(CX) == 0);
@@ -1141,7 +1141,7 @@ static void I386OP(jcxz16)(i386_state *cpustate)            // Opcode 0xe3
        }
 }
 
-static void I386OP(jmp_rel16)(i386_state *cpustate)         // Opcode 0xe9
+static void __FASTCALL I386OP(jmp_rel16)(i386_state *cpustate)         // Opcode 0xe9
 {
        INT16 disp = FETCH16(cpustate);
 
@@ -1157,7 +1157,7 @@ static void I386OP(jmp_rel16)(i386_state *cpustate)         // Opcode 0xe9
        CYCLES(cpustate,CYCLES_JMP);        /* TODO: Timing = 7 + m */
 }
 
-static void I386OP(jmp_abs16)(i386_state *cpustate)         // Opcode 0xea
+static void __FASTCALL I386OP(jmp_abs16)(i386_state *cpustate)         // Opcode 0xea
 {
        UINT16 address = FETCH16(cpustate);
        UINT16 segment = FETCH16(cpustate);
@@ -1177,7 +1177,7 @@ static void I386OP(jmp_abs16)(i386_state *cpustate)         // Opcode 0xea
        CYCLES(cpustate,CYCLES_JMP_INTERSEG);
 }
 
-static void I386OP(lea16)(i386_state *cpustate)             // Opcode 0x8d
+static void __FASTCALL I386OP(lea16)(i386_state *cpustate)             // Opcode 0x8d
 {
        UINT8 modrm = FETCH(cpustate);
        UINT32 ea = GetNonTranslatedEA(cpustate,modrm,NULL);
@@ -1185,7 +1185,7 @@ static void I386OP(lea16)(i386_state *cpustate)             // Opcode 0x8d
        CYCLES(cpustate,CYCLES_LEA);
 }
 
-static void I386OP(enter16)(i386_state *cpustate)           // Opcode 0xc8
+static void __FASTCALL I386OP(enter16)(i386_state *cpustate)           // Opcode 0xc8
 {
        UINT16 framesize = FETCH16(cpustate);
        UINT8 level = FETCH(cpustate) % 32;
@@ -1215,7 +1215,7 @@ static void I386OP(enter16)(i386_state *cpustate)           // Opcode 0xc8
        CYCLES(cpustate,CYCLES_ENTER);
 }
 
-static void I386OP(leave16)(i386_state *cpustate)           // Opcode 0xc9
+static void __FASTCALL I386OP(leave16)(i386_state *cpustate)           // Opcode 0xc9
 {
        if(!STACK_32BIT)
                REG16(SP) = REG16(BP);
@@ -1225,7 +1225,7 @@ static void I386OP(leave16)(i386_state *cpustate)           // Opcode 0xc9
        CYCLES(cpustate,CYCLES_LEAVE);
 }
 
-static void I386OP(lodsw)(i386_state *cpustate)             // Opcode 0xad
+static void __FASTCALL I386OP(lodsw)(i386_state *cpustate)             // Opcode 0xad
 {
        UINT32 eas;
        if( cpustate->segment_prefix ) {
@@ -1238,7 +1238,7 @@ static void I386OP(lodsw)(i386_state *cpustate)             // Opcode 0xad
        CYCLES(cpustate,CYCLES_LODS);
 }
 
-static void I386OP(loop16)(i386_state *cpustate)            // Opcode 0xe2
+static void __FASTCALL I386OP(loop16)(i386_state *cpustate)            // Opcode 0xe2
 {
        INT8 disp = FETCH(cpustate);
        INT32 val = (cpustate->address_size)?(--REG32(ECX)):(--REG16(CX));
@@ -1256,7 +1256,7 @@ static void I386OP(loop16)(i386_state *cpustate)            // Opcode 0xe2
        CYCLES(cpustate,CYCLES_LOOP);       /* TODO: Timing = 11 + m */
 }
 
-static void I386OP(loopne16)(i386_state *cpustate)          // Opcode 0xe0
+static void __FASTCALL I386OP(loopne16)(i386_state *cpustate)          // Opcode 0xe0
 {
        INT8 disp = FETCH(cpustate);
        INT32 val = (cpustate->address_size)?(--REG32(ECX)):(--REG16(CX));
@@ -1274,7 +1274,7 @@ static void I386OP(loopne16)(i386_state *cpustate)          // Opcode 0xe0
        CYCLES(cpustate,CYCLES_LOOPNZ);     /* TODO: Timing = 11 + m */
 }
 
-static void I386OP(loopz16)(i386_state *cpustate)           // Opcode 0xe1
+static void __FASTCALL I386OP(loopz16)(i386_state *cpustate)           // Opcode 0xe1
 {
        INT8 disp = FETCH(cpustate);
        INT32 val = (cpustate->address_size)?(--REG32(ECX)):(--REG16(CX));
@@ -1292,7 +1292,7 @@ static void I386OP(loopz16)(i386_state *cpustate)           // Opcode 0xe1
        CYCLES(cpustate,CYCLES_LOOPZ);      /* TODO: Timing = 11 + m */
 }
 
-static void I386OP(mov_rm16_r16)(i386_state *cpustate)      // Opcode 0x89
+static void __FASTCALL I386OP(mov_rm16_r16)(i386_state *cpustate)      // Opcode 0x89
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -1308,7 +1308,7 @@ static void I386OP(mov_rm16_r16)(i386_state *cpustate)      // Opcode 0x89
        }
 }
 
-static void I386OP(mov_r16_rm16)(i386_state *cpustate)      // Opcode 0x8b
+static void __FASTCALL I386OP(mov_r16_rm16)(i386_state *cpustate)      // Opcode 0x8b
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -1324,7 +1324,7 @@ static void I386OP(mov_r16_rm16)(i386_state *cpustate)      // Opcode 0x8b
        }
 }
 
-static void I386OP(mov_rm16_i16)(i386_state *cpustate)      // Opcode 0xc7
+static void __FASTCALL I386OP(mov_rm16_i16)(i386_state *cpustate)      // Opcode 0xc7
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1339,7 +1339,7 @@ static void I386OP(mov_rm16_i16)(i386_state *cpustate)      // Opcode 0xc7
        }
 }
 
-static void I386OP(mov_ax_m16)(i386_state *cpustate)        // Opcode 0xa1
+static void __FASTCALL I386OP(mov_ax_m16)(i386_state *cpustate)        // Opcode 0xa1
 {
        UINT32 offset, ea;
        if( cpustate->address_size ) {
@@ -1357,7 +1357,7 @@ static void I386OP(mov_ax_m16)(i386_state *cpustate)        // Opcode 0xa1
        CYCLES(cpustate,CYCLES_MOV_MEM_ACC);
 }
 
-static void I386OP(mov_m16_ax)(i386_state *cpustate)        // Opcode 0xa3
+static void __FASTCALL I386OP(mov_m16_ax)(i386_state *cpustate)        // Opcode 0xa3
 {
        UINT32 offset, ea;
        if( cpustate->address_size ) {
@@ -1375,55 +1375,55 @@ static void I386OP(mov_m16_ax)(i386_state *cpustate)        // Opcode 0xa3
        CYCLES(cpustate,CYCLES_MOV_ACC_MEM);
 }
 
-static void I386OP(mov_ax_i16)(i386_state *cpustate)        // Opcode 0xb8
+static void __FASTCALL I386OP(mov_ax_i16)(i386_state *cpustate)        // Opcode 0xb8
 {
        REG16(AX) = FETCH16(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_cx_i16)(i386_state *cpustate)        // Opcode 0xb9
+static void __FASTCALL I386OP(mov_cx_i16)(i386_state *cpustate)        // Opcode 0xb9
 {
        REG16(CX) = FETCH16(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_dx_i16)(i386_state *cpustate)        // Opcode 0xba
+static void __FASTCALL I386OP(mov_dx_i16)(i386_state *cpustate)        // Opcode 0xba
 {
        REG16(DX) = FETCH16(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_bx_i16)(i386_state *cpustate)        // Opcode 0xbb
+static void __FASTCALL I386OP(mov_bx_i16)(i386_state *cpustate)        // Opcode 0xbb
 {
        REG16(BX) = FETCH16(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_sp_i16)(i386_state *cpustate)        // Opcode 0xbc
+static void __FASTCALL I386OP(mov_sp_i16)(i386_state *cpustate)        // Opcode 0xbc
 {
        REG16(SP) = FETCH16(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_bp_i16)(i386_state *cpustate)        // Opcode 0xbd
+static void __FASTCALL I386OP(mov_bp_i16)(i386_state *cpustate)        // Opcode 0xbd
 {
        REG16(BP) = FETCH16(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_si_i16)(i386_state *cpustate)        // Opcode 0xbe
+static void __FASTCALL I386OP(mov_si_i16)(i386_state *cpustate)        // Opcode 0xbe
 {
        REG16(SI) = FETCH16(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_di_i16)(i386_state *cpustate)        // Opcode 0xbf
+static void __FASTCALL I386OP(mov_di_i16)(i386_state *cpustate)        // Opcode 0xbf
 {
        REG16(DI) = FETCH16(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(movsw)(i386_state *cpustate)             // Opcode 0xa5
+static void __FASTCALL I386OP(movsw)(i386_state *cpustate)             // Opcode 0xa5
 {
        UINT32 eas, ead;
        UINT16 v;
@@ -1440,7 +1440,7 @@ static void I386OP(movsw)(i386_state *cpustate)             // Opcode 0xa5
        CYCLES(cpustate,CYCLES_MOVS);
 }
 
-static void I386OP(movsx_r16_rm8)(i386_state *cpustate)     // Opcode 0x0f be
+static void __FASTCALL I386OP(movsx_r16_rm8)(i386_state *cpustate)     // Opcode 0x0f be
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1455,7 +1455,7 @@ static void I386OP(movsx_r16_rm8)(i386_state *cpustate)     // Opcode 0x0f be
        }
 }
 
-static void I386OP(movzx_r16_rm8)(i386_state *cpustate)     // Opcode 0x0f b6
+static void __FASTCALL I386OP(movzx_r16_rm8)(i386_state *cpustate)     // Opcode 0x0f b6
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1470,7 +1470,7 @@ static void I386OP(movzx_r16_rm8)(i386_state *cpustate)     // Opcode 0x0f b6
        }
 }
 
-static void I386OP(or_rm16_r16)(i386_state *cpustate)       // Opcode 0x09
+static void __FASTCALL I386OP(or_rm16_r16)(i386_state *cpustate)       // Opcode 0x09
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1490,7 +1490,7 @@ static void I386OP(or_rm16_r16)(i386_state *cpustate)       // Opcode 0x09
        }
 }
 
-static void I386OP(or_r16_rm16)(i386_state *cpustate)       // Opcode 0x0b
+static void __FASTCALL I386OP(or_r16_rm16)(i386_state *cpustate)       // Opcode 0x0b
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1510,7 +1510,7 @@ static void I386OP(or_r16_rm16)(i386_state *cpustate)       // Opcode 0x0b
        }
 }
 
-static void I386OP(or_ax_i16)(i386_state *cpustate)         // Opcode 0x0d
+static void __FASTCALL I386OP(or_ax_i16)(i386_state *cpustate)         // Opcode 0x0d
 {
        UINT16 src, dst;
        src = FETCH16(cpustate);
@@ -1520,7 +1520,7 @@ static void I386OP(or_ax_i16)(i386_state *cpustate)         // Opcode 0x0d
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(out_ax_i8)(i386_state *cpustate)         // Opcode 0xe7
+static void __FASTCALL I386OP(out_ax_i8)(i386_state *cpustate)         // Opcode 0xe7
 {
        UINT16 port = FETCH(cpustate);
        UINT16 data = REG16(AX);
@@ -1528,7 +1528,7 @@ static void I386OP(out_ax_i8)(i386_state *cpustate)         // Opcode 0xe7
        CYCLES(cpustate,CYCLES_OUT_VAR);
 }
 
-static void I386OP(out_ax_dx)(i386_state *cpustate)         // Opcode 0xef
+static void __FASTCALL I386OP(out_ax_dx)(i386_state *cpustate)         // Opcode 0xef
 {
        UINT16 port = REG16(DX);
        UINT16 data = REG16(AX);
@@ -1536,7 +1536,7 @@ static void I386OP(out_ax_dx)(i386_state *cpustate)         // Opcode 0xef
        CYCLES(cpustate,CYCLES_OUT);
 }
 
-static void I386OP(pop_ax)(i386_state *cpustate)            // Opcode 0x58
+static void __FASTCALL I386OP(pop_ax)(i386_state *cpustate)            // Opcode 0x58
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,2) == 0)
@@ -1548,7 +1548,7 @@ static void I386OP(pop_ax)(i386_state *cpustate)            // Opcode 0x58
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_cx)(i386_state *cpustate)            // Opcode 0x59
+static void __FASTCALL I386OP(pop_cx)(i386_state *cpustate)            // Opcode 0x59
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,2) == 0)
@@ -1560,7 +1560,7 @@ static void I386OP(pop_cx)(i386_state *cpustate)            // Opcode 0x59
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_dx)(i386_state *cpustate)            // Opcode 0x5a
+static void __FASTCALL I386OP(pop_dx)(i386_state *cpustate)            // Opcode 0x5a
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,2) == 0)
@@ -1572,7 +1572,7 @@ static void I386OP(pop_dx)(i386_state *cpustate)            // Opcode 0x5a
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_bx)(i386_state *cpustate)            // Opcode 0x5b
+static void __FASTCALL I386OP(pop_bx)(i386_state *cpustate)            // Opcode 0x5b
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,2) == 0)
@@ -1584,7 +1584,7 @@ static void I386OP(pop_bx)(i386_state *cpustate)            // Opcode 0x5b
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_sp)(i386_state *cpustate)            // Opcode 0x5c
+static void __FASTCALL I386OP(pop_sp)(i386_state *cpustate)            // Opcode 0x5c
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,2) == 0)
@@ -1596,7 +1596,7 @@ static void I386OP(pop_sp)(i386_state *cpustate)            // Opcode 0x5c
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_bp)(i386_state *cpustate)            // Opcode 0x5d
+static void __FASTCALL I386OP(pop_bp)(i386_state *cpustate)            // Opcode 0x5d
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,2) == 0)
@@ -1608,7 +1608,7 @@ static void I386OP(pop_bp)(i386_state *cpustate)            // Opcode 0x5d
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_si)(i386_state *cpustate)            // Opcode 0x5e
+static void __FASTCALL I386OP(pop_si)(i386_state *cpustate)            // Opcode 0x5e
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,2) == 0)
@@ -1620,7 +1620,7 @@ static void I386OP(pop_si)(i386_state *cpustate)            // Opcode 0x5e
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_di)(i386_state *cpustate)            // Opcode 0x5f
+static void __FASTCALL I386OP(pop_di)(i386_state *cpustate)            // Opcode 0x5f
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,2) == 0)
@@ -1659,27 +1659,27 @@ static bool I386OP(pop_seg16)(i386_state *cpustate, int segment)
        return true;
 }
 
-static void I386OP(pop_ds16)(i386_state *cpustate)          // Opcode 0x1f
+static void __FASTCALL I386OP(pop_ds16)(i386_state *cpustate)          // Opcode 0x1f
 {
        I386OP(pop_seg16)(cpustate, DS);
 }
 
-static void I386OP(pop_es16)(i386_state *cpustate)          // Opcode 0x07
+static void __FASTCALL I386OP(pop_es16)(i386_state *cpustate)          // Opcode 0x07
 {
        I386OP(pop_seg16)(cpustate, ES);
 }
 
-static void I386OP(pop_fs16)(i386_state *cpustate)          // Opcode 0x0f a1
+static void __FASTCALL I386OP(pop_fs16)(i386_state *cpustate)          // Opcode 0x0f a1
 {
        I386OP(pop_seg16)(cpustate, FS);
 }
 
-static void I386OP(pop_gs16)(i386_state *cpustate)          // Opcode 0x0f a9
+static void __FASTCALL I386OP(pop_gs16)(i386_state *cpustate)          // Opcode 0x0f a9
 {
        I386OP(pop_seg16)(cpustate, GS);
 }
 
-static void I386OP(pop_ss16)(i386_state *cpustate)          // Opcode 0x17
+static void __FASTCALL I386OP(pop_ss16)(i386_state *cpustate)          // Opcode 0x17
 {
        if(!I386OP(pop_seg16)(cpustate, SS)) return;
        if(cpustate->IF != 0) // if external interrupts are enabled
@@ -1689,7 +1689,7 @@ static void I386OP(pop_ss16)(i386_state *cpustate)          // Opcode 0x17
        }
 }
 
-static void I386OP(pop_rm16)(i386_state *cpustate)          // Opcode 0x8f
+static void __FASTCALL I386OP(pop_rm16)(i386_state *cpustate)          // Opcode 0x8f
 {
        UINT8 modrm = FETCH(cpustate);
        UINT16 value;
@@ -1721,7 +1721,7 @@ static void I386OP(pop_rm16)(i386_state *cpustate)          // Opcode 0x8f
        CYCLES(cpustate,CYCLES_POP_RM);
 }
 
-static void I386OP(popa)(i386_state *cpustate)              // Opcode 0x61
+static void __FASTCALL I386OP(popa)(i386_state *cpustate)              // Opcode 0x61
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
 
@@ -1741,7 +1741,7 @@ static void I386OP(popa)(i386_state *cpustate)              // Opcode 0x61
        CYCLES(cpustate,CYCLES_POPA);
 }
 
-static void I386OP(popf)(i386_state *cpustate)              // Opcode 0x9d
+static void __FASTCALL I386OP(popf)(i386_state *cpustate)              // Opcode 0x9d
 {
        UINT32 value;
        UINT32 current = get_flags(cpustate);
@@ -1802,7 +1802,7 @@ static void I386OP(popf)(i386_state *cpustate)              // Opcode 0x9d
        CYCLES(cpustate,CYCLES_POPF);
 }
 
-static void I386OP(push_ax)(i386_state *cpustate)           // Opcode 0x50
+static void __FASTCALL I386OP(push_ax)(i386_state *cpustate)           // Opcode 0x50
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1816,7 +1816,7 @@ static void I386OP(push_ax)(i386_state *cpustate)           // Opcode 0x50
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_cx)(i386_state *cpustate)           // Opcode 0x51
+static void __FASTCALL I386OP(push_cx)(i386_state *cpustate)           // Opcode 0x51
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1830,7 +1830,7 @@ static void I386OP(push_cx)(i386_state *cpustate)           // Opcode 0x51
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_dx)(i386_state *cpustate)           // Opcode 0x52
+static void __FASTCALL I386OP(push_dx)(i386_state *cpustate)           // Opcode 0x52
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1844,7 +1844,7 @@ static void I386OP(push_dx)(i386_state *cpustate)           // Opcode 0x52
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_bx)(i386_state *cpustate)           // Opcode 0x53
+static void __FASTCALL I386OP(push_bx)(i386_state *cpustate)           // Opcode 0x53
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1858,7 +1858,7 @@ static void I386OP(push_bx)(i386_state *cpustate)           // Opcode 0x53
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_sp)(i386_state *cpustate)           // Opcode 0x54
+static void __FASTCALL I386OP(push_sp)(i386_state *cpustate)           // Opcode 0x54
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1872,7 +1872,7 @@ static void I386OP(push_sp)(i386_state *cpustate)           // Opcode 0x54
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_bp)(i386_state *cpustate)           // Opcode 0x55
+static void __FASTCALL I386OP(push_bp)(i386_state *cpustate)           // Opcode 0x55
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1886,7 +1886,7 @@ static void I386OP(push_bp)(i386_state *cpustate)           // Opcode 0x55
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_si)(i386_state *cpustate)           // Opcode 0x56
+static void __FASTCALL I386OP(push_si)(i386_state *cpustate)           // Opcode 0x56
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1900,7 +1900,7 @@ static void I386OP(push_si)(i386_state *cpustate)           // Opcode 0x56
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_di)(i386_state *cpustate)           // Opcode 0x57
+static void __FASTCALL I386OP(push_di)(i386_state *cpustate)           // Opcode 0x57
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1914,7 +1914,7 @@ static void I386OP(push_di)(i386_state *cpustate)           // Opcode 0x57
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_cs16)(i386_state *cpustate)         // Opcode 0x0e
+static void __FASTCALL I386OP(push_cs16)(i386_state *cpustate)         // Opcode 0x0e
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1928,7 +1928,7 @@ static void I386OP(push_cs16)(i386_state *cpustate)         // Opcode 0x0e
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_ds16)(i386_state *cpustate)         // Opcode 0x1e
+static void __FASTCALL I386OP(push_ds16)(i386_state *cpustate)         // Opcode 0x1e
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1942,7 +1942,7 @@ static void I386OP(push_ds16)(i386_state *cpustate)         // Opcode 0x1e
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_es16)(i386_state *cpustate)         // Opcode 0x06
+static void __FASTCALL I386OP(push_es16)(i386_state *cpustate)         // Opcode 0x06
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1956,7 +1956,7 @@ static void I386OP(push_es16)(i386_state *cpustate)         // Opcode 0x06
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_fs16)(i386_state *cpustate)         // Opcode 0x0f a0
+static void __FASTCALL I386OP(push_fs16)(i386_state *cpustate)         // Opcode 0x0f a0
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1970,7 +1970,7 @@ static void I386OP(push_fs16)(i386_state *cpustate)         // Opcode 0x0f a0
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_gs16)(i386_state *cpustate)         // Opcode 0x0f a8
+static void __FASTCALL I386OP(push_gs16)(i386_state *cpustate)         // Opcode 0x0f a8
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1984,7 +1984,7 @@ static void I386OP(push_gs16)(i386_state *cpustate)         // Opcode 0x0f a8
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_ss16)(i386_state *cpustate)         // Opcode 0x16
+static void __FASTCALL I386OP(push_ss16)(i386_state *cpustate)         // Opcode 0x16
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1998,7 +1998,7 @@ static void I386OP(push_ss16)(i386_state *cpustate)         // Opcode 0x16
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_i16)(i386_state *cpustate)          // Opcode 0x68
+static void __FASTCALL I386OP(push_i16)(i386_state *cpustate)          // Opcode 0x68
 {
        UINT16 value = FETCH16(cpustate);
        UINT32 offset;
@@ -2013,7 +2013,7 @@ static void I386OP(push_i16)(i386_state *cpustate)          // Opcode 0x68
        CYCLES(cpustate,CYCLES_PUSH_IMM);
 }
 
-static void I386OP(pusha)(i386_state *cpustate)             // Opcode 0x60
+static void __FASTCALL I386OP(pusha)(i386_state *cpustate)             // Opcode 0x60
 {
        UINT16 temp = REG16(SP);
        UINT32 offset;
@@ -2037,7 +2037,7 @@ static void I386OP(pusha)(i386_state *cpustate)             // Opcode 0x60
        CYCLES(cpustate,CYCLES_PUSHA);
 }
 
-static void I386OP(pushf)(i386_state *cpustate)             // Opcode 0x9c
+static void __FASTCALL I386OP(pushf)(i386_state *cpustate)             // Opcode 0x9c
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -2055,7 +2055,7 @@ static void I386OP(pushf)(i386_state *cpustate)             // Opcode 0x9c
        CYCLES(cpustate,CYCLES_PUSHF);
 }
 
-static void I386OP(ret_near16_i16)(i386_state *cpustate)    // Opcode 0xc2
+static void __FASTCALL I386OP(ret_near16_i16)(i386_state *cpustate)    // Opcode 0xc2
 {
        INT16 disp = FETCH16(cpustate);
        cpustate->eip = POP16(cpustate);
@@ -2064,14 +2064,14 @@ static void I386OP(ret_near16_i16)(i386_state *cpustate)    // Opcode 0xc2
        CYCLES(cpustate,CYCLES_RET_IMM);        /* TODO: Timing = 10 + m */
 }
 
-static void I386OP(ret_near16)(i386_state *cpustate)        // Opcode 0xc3
+static void __FASTCALL I386OP(ret_near16)(i386_state *cpustate)        // Opcode 0xc3
 {
        cpustate->eip = POP16(cpustate);
        CHANGE_PC(cpustate,cpustate->eip);
        CYCLES(cpustate,CYCLES_RET);        /* TODO: Timing = 10 + m */
 }
 
-static void I386OP(sbb_rm16_r16)(i386_state *cpustate)      // Opcode 0x19
+static void __FASTCALL I386OP(sbb_rm16_r16)(i386_state *cpustate)      // Opcode 0x19
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2091,7 +2091,7 @@ static void I386OP(sbb_rm16_r16)(i386_state *cpustate)      // Opcode 0x19
        }
 }
 
-static void I386OP(sbb_r16_rm16)(i386_state *cpustate)      // Opcode 0x1b
+static void __FASTCALL I386OP(sbb_r16_rm16)(i386_state *cpustate)      // Opcode 0x1b
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2111,7 +2111,7 @@ static void I386OP(sbb_r16_rm16)(i386_state *cpustate)      // Opcode 0x1b
        }
 }
 
-static void I386OP(sbb_ax_i16)(i386_state *cpustate)        // Opcode 0x1d
+static void __FASTCALL I386OP(sbb_ax_i16)(i386_state *cpustate)        // Opcode 0x1d
 {
        UINT16 src, dst;
        src = FETCH16(cpustate);
@@ -2121,7 +2121,7 @@ static void I386OP(sbb_ax_i16)(i386_state *cpustate)        // Opcode 0x1d
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(scasw)(i386_state *cpustate)             // Opcode 0xaf
+static void __FASTCALL I386OP(scasw)(i386_state *cpustate)             // Opcode 0xaf
 {
        UINT32 eas;
        UINT16 src, dst;
@@ -2133,7 +2133,7 @@ static void I386OP(scasw)(i386_state *cpustate)             // Opcode 0xaf
        CYCLES(cpustate,CYCLES_SCAS);
 }
 
-static void I386OP(shld16_i8)(i386_state *cpustate)         // Opcode 0x0f a4
+static void __FASTCALL I386OP(shld16_i8)(i386_state *cpustate)         // Opcode 0x0f a4
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2179,7 +2179,7 @@ static void I386OP(shld16_i8)(i386_state *cpustate)         // Opcode 0x0f a4
        }
 }
 
-static void I386OP(shld16_cl)(i386_state *cpustate)         // Opcode 0x0f a5
+static void __FASTCALL I386OP(shld16_cl)(i386_state *cpustate)         // Opcode 0x0f a5
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2224,7 +2224,7 @@ static void I386OP(shld16_cl)(i386_state *cpustate)         // Opcode 0x0f a5
        }
 }
 
-static void I386OP(shrd16_i8)(i386_state *cpustate)         // Opcode 0x0f ac
+static void __FASTCALL I386OP(shrd16_i8)(i386_state *cpustate)         // Opcode 0x0f ac
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2269,7 +2269,7 @@ static void I386OP(shrd16_i8)(i386_state *cpustate)         // Opcode 0x0f ac
        }
 }
 
-static void I386OP(shrd16_cl)(i386_state *cpustate)         // Opcode 0x0f ad
+static void __FASTCALL I386OP(shrd16_cl)(i386_state *cpustate)         // Opcode 0x0f ad
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2314,7 +2314,7 @@ static void I386OP(shrd16_cl)(i386_state *cpustate)         // Opcode 0x0f ad
        }
 }
 
-static void I386OP(stosw)(i386_state *cpustate)             // Opcode 0xab
+static void __FASTCALL I386OP(stosw)(i386_state *cpustate)             // Opcode 0xab
 {
        UINT32 ead;
        ead = i386_translate(cpustate, ES, cpustate->address_size ? REG32(EDI) : REG16(DI), 1, 2 );
@@ -2323,7 +2323,7 @@ static void I386OP(stosw)(i386_state *cpustate)             // Opcode 0xab
        CYCLES(cpustate,CYCLES_STOS);
 }
 
-static void I386OP(sub_rm16_r16)(i386_state *cpustate)      // Opcode 0x29
+static void __FASTCALL I386OP(sub_rm16_r16)(i386_state *cpustate)      // Opcode 0x29
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2343,7 +2343,7 @@ static void I386OP(sub_rm16_r16)(i386_state *cpustate)      // Opcode 0x29
        }
 }
 
-static void I386OP(sub_r16_rm16)(i386_state *cpustate)      // Opcode 0x2b
+static void __FASTCALL I386OP(sub_r16_rm16)(i386_state *cpustate)      // Opcode 0x2b
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2363,7 +2363,7 @@ static void I386OP(sub_r16_rm16)(i386_state *cpustate)      // Opcode 0x2b
        }
 }
 
-static void I386OP(sub_ax_i16)(i386_state *cpustate)        // Opcode 0x2d
+static void __FASTCALL I386OP(sub_ax_i16)(i386_state *cpustate)        // Opcode 0x2d
 {
        UINT16 src, dst;
        src = FETCH16(cpustate);
@@ -2373,7 +2373,7 @@ static void I386OP(sub_ax_i16)(i386_state *cpustate)        // Opcode 0x2d
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(test_ax_i16)(i386_state *cpustate)       // Opcode 0xa9
+static void __FASTCALL I386OP(test_ax_i16)(i386_state *cpustate)       // Opcode 0xa9
 {
        UINT16 src = FETCH16(cpustate);
        UINT16 dst = REG16(AX);
@@ -2384,7 +2384,7 @@ static void I386OP(test_ax_i16)(i386_state *cpustate)       // Opcode 0xa9
        CYCLES(cpustate,CYCLES_TEST_IMM_ACC);
 }
 
-static void I386OP(test_rm16_r16)(i386_state *cpustate)     // Opcode 0x85
+static void __FASTCALL I386OP(test_rm16_r16)(i386_state *cpustate)     // Opcode 0x85
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2408,7 +2408,7 @@ static void I386OP(test_rm16_r16)(i386_state *cpustate)     // Opcode 0x85
        }
 }
 
-static void I386OP(xchg_ax_cx)(i386_state *cpustate)        // Opcode 0x91
+static void __FASTCALL I386OP(xchg_ax_cx)(i386_state *cpustate)        // Opcode 0x91
 {
        UINT16 temp;
        temp = REG16(AX);
@@ -2417,7 +2417,7 @@ static void I386OP(xchg_ax_cx)(i386_state *cpustate)        // Opcode 0x91
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_ax_dx)(i386_state *cpustate)        // Opcode 0x92
+static void __FASTCALL I386OP(xchg_ax_dx)(i386_state *cpustate)        // Opcode 0x92
 {
        UINT16 temp;
        temp = REG16(AX);
@@ -2426,7 +2426,7 @@ static void I386OP(xchg_ax_dx)(i386_state *cpustate)        // Opcode 0x92
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_ax_bx)(i386_state *cpustate)        // Opcode 0x93
+static void __FASTCALL I386OP(xchg_ax_bx)(i386_state *cpustate)        // Opcode 0x93
 {
        UINT16 temp;
        temp = REG16(AX);
@@ -2435,7 +2435,7 @@ static void I386OP(xchg_ax_bx)(i386_state *cpustate)        // Opcode 0x93
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_ax_sp)(i386_state *cpustate)        // Opcode 0x94
+static void __FASTCALL I386OP(xchg_ax_sp)(i386_state *cpustate)        // Opcode 0x94
 {
        UINT16 temp;
        temp = REG16(AX);
@@ -2444,7 +2444,7 @@ static void I386OP(xchg_ax_sp)(i386_state *cpustate)        // Opcode 0x94
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_ax_bp)(i386_state *cpustate)        // Opcode 0x95
+static void __FASTCALL I386OP(xchg_ax_bp)(i386_state *cpustate)        // Opcode 0x95
 {
        UINT16 temp;
        temp = REG16(AX);
@@ -2453,7 +2453,7 @@ static void I386OP(xchg_ax_bp)(i386_state *cpustate)        // Opcode 0x95
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_ax_si)(i386_state *cpustate)        // Opcode 0x96
+static void __FASTCALL I386OP(xchg_ax_si)(i386_state *cpustate)        // Opcode 0x96
 {
        UINT16 temp;
        temp = REG16(AX);
@@ -2462,7 +2462,7 @@ static void I386OP(xchg_ax_si)(i386_state *cpustate)        // Opcode 0x96
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_ax_di)(i386_state *cpustate)        // Opcode 0x97
+static void __FASTCALL I386OP(xchg_ax_di)(i386_state *cpustate)        // Opcode 0x97
 {
        UINT16 temp;
        temp = REG16(AX);
@@ -2471,7 +2471,7 @@ static void I386OP(xchg_ax_di)(i386_state *cpustate)        // Opcode 0x97
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_r16_rm16)(i386_state *cpustate)     // Opcode 0x87
+static void __FASTCALL I386OP(xchg_r16_rm16)(i386_state *cpustate)     // Opcode 0x87
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2490,7 +2490,7 @@ static void I386OP(xchg_r16_rm16)(i386_state *cpustate)     // Opcode 0x87
        }
 }
 
-static void I386OP(xor_rm16_r16)(i386_state *cpustate)      // Opcode 0x31
+static void __FASTCALL I386OP(xor_rm16_r16)(i386_state *cpustate)      // Opcode 0x31
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2510,7 +2510,7 @@ static void I386OP(xor_rm16_r16)(i386_state *cpustate)      // Opcode 0x31
        }
 }
 
-static void I386OP(xor_r16_rm16)(i386_state *cpustate)      // Opcode 0x33
+static void __FASTCALL I386OP(xor_r16_rm16)(i386_state *cpustate)      // Opcode 0x33
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2530,7 +2530,7 @@ static void I386OP(xor_r16_rm16)(i386_state *cpustate)      // Opcode 0x33
        }
 }
 
-static void I386OP(xor_ax_i16)(i386_state *cpustate)        // Opcode 0x35
+static void __FASTCALL I386OP(xor_ax_i16)(i386_state *cpustate)        // Opcode 0x35
 {
        UINT16 src, dst;
        src = FETCH16(cpustate);
@@ -2542,7 +2542,7 @@ static void I386OP(xor_ax_i16)(i386_state *cpustate)        // Opcode 0x35
 
 
 
-static void I386OP(group81_16)(i386_state *cpustate)        // Opcode 0x81
+static void __FASTCALL I386OP(group81_16)(i386_state *cpustate)        // Opcode 0x81
 {
        UINT32 ea;
        UINT16 src, dst;
@@ -2679,7 +2679,7 @@ static void I386OP(group81_16)(i386_state *cpustate)        // Opcode 0x81
        }
 }
 
-static void I386OP(group83_16)(i386_state *cpustate)        // Opcode 0x83
+static void __FASTCALL I386OP(group83_16)(i386_state *cpustate)        // Opcode 0x83
 {
        UINT32 ea;
        UINT16 src, dst;
@@ -2816,7 +2816,7 @@ static void I386OP(group83_16)(i386_state *cpustate)        // Opcode 0x83
        }
 }
 
-static void I386OP(groupC1_16)(i386_state *cpustate)        // Opcode 0xc1
+static void __FASTCALL I386OP(groupC1_16)(i386_state *cpustate)        // Opcode 0xc1
 {
        UINT16 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2836,7 +2836,7 @@ static void I386OP(groupC1_16)(i386_state *cpustate)        // Opcode 0xc1
        }
 }
 
-static void I386OP(groupD1_16)(i386_state *cpustate)        // Opcode 0xd1
+static void __FASTCALL I386OP(groupD1_16)(i386_state *cpustate)        // Opcode 0xd1
 {
        UINT16 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2853,7 +2853,7 @@ static void I386OP(groupD1_16)(i386_state *cpustate)        // Opcode 0xd1
        }
 }
 
-static void I386OP(groupD3_16)(i386_state *cpustate)        // Opcode 0xd3
+static void __FASTCALL I386OP(groupD3_16)(i386_state *cpustate)        // Opcode 0xd3
 {
        UINT16 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2870,7 +2870,7 @@ static void I386OP(groupD3_16)(i386_state *cpustate)        // Opcode 0xd3
        }
 }
 
-static void I386OP(groupF7_16)(i386_state *cpustate)        // Opcode 0xf7
+static void __FASTCALL I386OP(groupF7_16)(i386_state *cpustate)        // Opcode 0xf7
 {
        UINT8 modrm = FETCH(cpustate);
 
@@ -3034,7 +3034,7 @@ static void I386OP(groupF7_16)(i386_state *cpustate)        // Opcode 0xf7
        }
 }
 
-static void I386OP(groupFF_16)(i386_state *cpustate)        // Opcode 0xff
+static void __FASTCALL I386OP(groupFF_16)(i386_state *cpustate)        // Opcode 0xff
 {
        UINT8 modrm = FETCH(cpustate);
 
@@ -3188,7 +3188,7 @@ static void I386OP(groupFF_16)(i386_state *cpustate)        // Opcode 0xff
 extern void i386_change_protect_mode(i386_state *cpustate, int val);
 extern void i386_change_paging_mode(i386_state *cpustate, int val);
 
-static void I386OP(group0F00_16)(i386_state *cpustate)          // Opcode 0x0f 00
+static void __FASTCALL I386OP(group0F00_16)(i386_state *cpustate)          // Opcode 0x0f 00
 {
        UINT32 address, ea;
        UINT8 modrm = FETCH(cpustate);
@@ -3397,7 +3397,7 @@ static void I386OP(group0F00_16)(i386_state *cpustate)          // Opcode 0x0f 0
        }
 }
 
-static void I386OP(group0F01_16)(i386_state *cpustate)      // Opcode 0x0f 01
+static void __FASTCALL I386OP(group0F01_16)(i386_state *cpustate)      // Opcode 0x0f 01
 {
        UINT8 modrm = FETCH(cpustate);
        UINT16 address;
@@ -3511,7 +3511,7 @@ static void I386OP(group0F01_16)(i386_state *cpustate)      // Opcode 0x0f 01
        }
 }
 
-static void I386OP(group0FBA_16)(i386_state *cpustate)      // Opcode 0x0f ba
+static void __FASTCALL I386OP(group0FBA_16)(i386_state *cpustate)      // Opcode 0x0f ba
 {
        UINT8 modrm = FETCH(cpustate);
 
@@ -3631,7 +3631,7 @@ static void I386OP(group0FBA_16)(i386_state *cpustate)      // Opcode 0x0f ba
        }
 }
 
-static void I386OP(lar_r16_rm16)(i386_state *cpustate)  // Opcode 0x0f 0x02
+static void __FASTCALL I386OP(lar_r16_rm16)(i386_state *cpustate)  // Opcode 0x0f 0x02
 {
        UINT8 modrm = FETCH(cpustate);
        I386_SREG seg;
@@ -3698,7 +3698,7 @@ static void I386OP(lar_r16_rm16)(i386_state *cpustate)  // Opcode 0x0f 0x02
        }
 }
 
-static void I386OP(lsl_r16_rm16)(i386_state *cpustate)  // Opcode 0x0f 0x03
+static void __FASTCALL I386OP(lsl_r16_rm16)(i386_state *cpustate)  // Opcode 0x0f 0x03
 {
        UINT8 modrm = FETCH(cpustate);
        UINT32 limit;
@@ -3763,7 +3763,7 @@ static void I386OP(lsl_r16_rm16)(i386_state *cpustate)  // Opcode 0x0f 0x03
        }
 }
 
-static void I386OP(bound_r16_m16_m16)(i386_state *cpustate) // Opcode 0x62
+static void __FASTCALL I386OP(bound_r16_m16_m16)(i386_state *cpustate) // Opcode 0x62
 {
        UINT8 modrm;
        INT16 val, low, high;
@@ -3793,7 +3793,7 @@ static void I386OP(bound_r16_m16_m16)(i386_state *cpustate) // Opcode 0x62
        }
 }
 
-static void I386OP(retf16)(i386_state *cpustate)            // Opcode 0xcb
+static void __FASTCALL I386OP(retf16)(i386_state *cpustate)            // Opcode 0xcb
 {
        if(PROTECTED_MODE && !V8086_MODE)
        {
@@ -3810,7 +3810,7 @@ static void I386OP(retf16)(i386_state *cpustate)            // Opcode 0xcb
        CYCLES(cpustate,CYCLES_RET_INTERSEG);
 }
 
-static void I386OP(retf_i16)(i386_state *cpustate)          // Opcode 0xca
+static void __FASTCALL I386OP(retf_i16)(i386_state *cpustate)          // Opcode 0xca
 {
        UINT16 count = FETCH16(cpustate);
 
@@ -3848,31 +3848,31 @@ static bool I386OP(load_far_pointer16)(i386_state *cpustate, int s)
        return true;
 }
 
-static void I386OP(lds16)(i386_state *cpustate)             // Opcode 0xc5
+static void __FASTCALL I386OP(lds16)(i386_state *cpustate)             // Opcode 0xc5
 {
        if(I386OP(load_far_pointer16)(cpustate, DS))
                CYCLES(cpustate,CYCLES_LDS);
 }
 
-static void I386OP(lss16)(i386_state *cpustate)             // Opcode 0x0f 0xb2
+static void __FASTCALL I386OP(lss16)(i386_state *cpustate)             // Opcode 0x0f 0xb2
 {
        if(I386OP(load_far_pointer16)(cpustate, SS))
                CYCLES(cpustate,CYCLES_LSS);
 }
 
-static void I386OP(les16)(i386_state *cpustate)             // Opcode 0xc4
+static void __FASTCALL I386OP(les16)(i386_state *cpustate)             // Opcode 0xc4
 {
        if(I386OP(load_far_pointer16)(cpustate, ES))
                CYCLES(cpustate,CYCLES_LES);
 }
 
-static void I386OP(lfs16)(i386_state *cpustate)             // Opcode 0x0f 0xb4
+static void __FASTCALL I386OP(lfs16)(i386_state *cpustate)             // Opcode 0x0f 0xb4
 {
        if(I386OP(load_far_pointer16)(cpustate, FS))
                CYCLES(cpustate,CYCLES_LFS);
 }
 
-static void I386OP(lgs16)(i386_state *cpustate)             // Opcode 0x0f 0xb5
+static void __FASTCALL I386OP(lgs16)(i386_state *cpustate)             // Opcode 0x0f 0xb5
 {
        if(I386OP(load_far_pointer16)(cpustate, GS))
                CYCLES(cpustate,CYCLES_LGS);
index 1910af8..2e4cd99 100644 (file)
@@ -1,6 +1,6 @@
 // license:BSD-3-Clause
 // copyright-holders:Ville Linde, Barry Rodewald, Carl, Philip Bennett
-static UINT32 I386OP(shift_rotate32)(i386_state *cpustate, UINT8 modrm, UINT32 value, UINT8 shift)
+static UINT32 __FASTCALL I386OP(shift_rotate32)(i386_state *cpustate, UINT8 modrm, UINT32 value, UINT8 shift)
 {
        UINT32 dst, src;
        dst = value;
@@ -120,7 +120,7 @@ static UINT32 I386OP(shift_rotate32)(i386_state *cpustate, UINT8 modrm, UINT32 v
 
 
 
-static void I386OP(adc_rm32_r32)(i386_state *cpustate)      // Opcode 0x11
+static void __FASTCALL I386OP(adc_rm32_r32)(i386_state *cpustate)      // Opcode 0x11
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -140,7 +140,7 @@ static void I386OP(adc_rm32_r32)(i386_state *cpustate)      // Opcode 0x11
        }
 }
 
-static void I386OP(adc_r32_rm32)(i386_state *cpustate)      // Opcode 0x13
+static void __FASTCALL I386OP(adc_r32_rm32)(i386_state *cpustate)      // Opcode 0x13
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -160,7 +160,7 @@ static void I386OP(adc_r32_rm32)(i386_state *cpustate)      // Opcode 0x13
        }
 }
 
-static void I386OP(adc_eax_i32)(i386_state *cpustate)       // Opcode 0x15
+static void __FASTCALL I386OP(adc_eax_i32)(i386_state *cpustate)       // Opcode 0x15
 {
        UINT32 src, dst;
        src = FETCH32(cpustate);
@@ -170,7 +170,7 @@ static void I386OP(adc_eax_i32)(i386_state *cpustate)       // Opcode 0x15
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(add_rm32_r32)(i386_state *cpustate)      // Opcode 0x01
+static void __FASTCALL I386OP(add_rm32_r32)(i386_state *cpustate)      // Opcode 0x01
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -190,7 +190,7 @@ static void I386OP(add_rm32_r32)(i386_state *cpustate)      // Opcode 0x01
        }
 }
 
-static void I386OP(add_r32_rm32)(i386_state *cpustate)      // Opcode 0x03
+static void __FASTCALL I386OP(add_r32_rm32)(i386_state *cpustate)      // Opcode 0x03
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -210,7 +210,7 @@ static void I386OP(add_r32_rm32)(i386_state *cpustate)      // Opcode 0x03
        }
 }
 
-static void I386OP(add_eax_i32)(i386_state *cpustate)       // Opcode 0x05
+static void __FASTCALL I386OP(add_eax_i32)(i386_state *cpustate)       // Opcode 0x05
 {
        UINT32 src, dst;
        src = FETCH32(cpustate);
@@ -220,7 +220,7 @@ static void I386OP(add_eax_i32)(i386_state *cpustate)       // Opcode 0x05
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(and_rm32_r32)(i386_state *cpustate)      // Opcode 0x21
+static void __FASTCALL I386OP(and_rm32_r32)(i386_state *cpustate)      // Opcode 0x21
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -240,7 +240,7 @@ static void I386OP(and_rm32_r32)(i386_state *cpustate)      // Opcode 0x21
        }
 }
 
-static void I386OP(and_r32_rm32)(i386_state *cpustate)      // Opcode 0x23
+static void __FASTCALL I386OP(and_r32_rm32)(i386_state *cpustate)      // Opcode 0x23
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -260,7 +260,7 @@ static void I386OP(and_r32_rm32)(i386_state *cpustate)      // Opcode 0x23
        }
 }
 
-static void I386OP(and_eax_i32)(i386_state *cpustate)       // Opcode 0x25
+static void __FASTCALL I386OP(and_eax_i32)(i386_state *cpustate)       // Opcode 0x25
 {
        UINT32 src, dst;
        src = FETCH32(cpustate);
@@ -270,7 +270,7 @@ static void I386OP(and_eax_i32)(i386_state *cpustate)       // Opcode 0x25
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(bsf_r32_rm32)(i386_state *cpustate)      // Opcode 0x0f bc
+static void __FASTCALL I386OP(bsf_r32_rm32)(i386_state *cpustate)      // Opcode 0x0f bc
 {
        UINT32 src, dst, temp;
        UINT8 modrm = FETCH(cpustate);
@@ -299,7 +299,7 @@ static void I386OP(bsf_r32_rm32)(i386_state *cpustate)      // Opcode 0x0f bc
        CYCLES(cpustate,CYCLES_BSF_BASE);
 }
 
-static void I386OP(bsr_r32_rm32)(i386_state *cpustate)      // Opcode 0x0f bd
+static void __FASTCALL I386OP(bsr_r32_rm32)(i386_state *cpustate)      // Opcode 0x0f bd
 {
        UINT32 src, dst, temp;
        UINT8 modrm = FETCH(cpustate);
@@ -328,7 +328,7 @@ static void I386OP(bsr_r32_rm32)(i386_state *cpustate)      // Opcode 0x0f bd
        CYCLES(cpustate,CYCLES_BSR_BASE);
 }
 
-static void I386OP(bt_rm32_r32)(i386_state *cpustate)       // Opcode 0x0f a3
+static void __FASTCALL I386OP(bt_rm32_r32)(i386_state *cpustate)       // Opcode 0x0f a3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -359,7 +359,7 @@ static void I386OP(bt_rm32_r32)(i386_state *cpustate)       // Opcode 0x0f a3
        }
 }
 
-static void I386OP(btc_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f bb
+static void __FASTCALL I386OP(btc_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f bb
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -394,7 +394,7 @@ static void I386OP(btc_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f bb
        }
 }
 
-static void I386OP(btr_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f b3
+static void __FASTCALL I386OP(btr_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f b3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -429,7 +429,7 @@ static void I386OP(btr_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f b3
        }
 }
 
-static void I386OP(bts_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f ab
+static void __FASTCALL I386OP(bts_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f ab
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -464,7 +464,7 @@ static void I386OP(bts_rm32_r32)(i386_state *cpustate)      // Opcode 0x0f ab
        }
 }
 
-static void I386OP(call_abs32)(i386_state *cpustate)        // Opcode 0x9a
+static void __FASTCALL I386OP(call_abs32)(i386_state *cpustate)        // Opcode 0x9a
 {
        UINT32 offset = FETCH32(cpustate);
        UINT16 ptr = FETCH16(cpustate);
@@ -494,7 +494,7 @@ static void I386OP(call_abs32)(i386_state *cpustate)        // Opcode 0x9a
        CHANGE_PC(cpustate,cpustate->eip);
 }
 
-static void I386OP(call_rel32)(i386_state *cpustate)        // Opcode 0xe8
+static void __FASTCALL I386OP(call_rel32)(i386_state *cpustate)        // Opcode 0xe8
 {
        INT32 disp = FETCH32(cpustate);
        PUSH32(cpustate, cpustate->eip );
@@ -503,7 +503,7 @@ static void I386OP(call_rel32)(i386_state *cpustate)        // Opcode 0xe8
        CYCLES(cpustate,CYCLES_CALL);       /* TODO: Timing = 7 + m */
 }
 
-static void I386OP(cdq)(i386_state *cpustate)               // Opcode 0x99
+static void __FASTCALL I386OP(cdq)(i386_state *cpustate)               // Opcode 0x99
 {
        if( REG32(EAX) & 0x80000000 ) {
                REG32(EDX) = 0xffffffff;
@@ -513,7 +513,7 @@ static void I386OP(cdq)(i386_state *cpustate)               // Opcode 0x99
        CYCLES(cpustate,CYCLES_CWD);
 }
 
-static void I386OP(cmp_rm32_r32)(i386_state *cpustate)      // Opcode 0x39
+static void __FASTCALL I386OP(cmp_rm32_r32)(i386_state *cpustate)      // Opcode 0x39
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -531,7 +531,7 @@ static void I386OP(cmp_rm32_r32)(i386_state *cpustate)      // Opcode 0x39
        }
 }
 
-static void I386OP(cmp_r32_rm32)(i386_state *cpustate)      // Opcode 0x3b
+static void __FASTCALL I386OP(cmp_r32_rm32)(i386_state *cpustate)      // Opcode 0x3b
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -549,7 +549,7 @@ static void I386OP(cmp_r32_rm32)(i386_state *cpustate)      // Opcode 0x3b
        }
 }
 
-static void I386OP(cmp_eax_i32)(i386_state *cpustate)       // Opcode 0x3d
+static void __FASTCALL I386OP(cmp_eax_i32)(i386_state *cpustate)       // Opcode 0x3d
 {
        UINT32 src, dst;
        src = FETCH32(cpustate);
@@ -558,7 +558,7 @@ static void I386OP(cmp_eax_i32)(i386_state *cpustate)       // Opcode 0x3d
        CYCLES(cpustate,CYCLES_CMP_IMM_ACC);
 }
 
-static void I386OP(cmpsd)(i386_state *cpustate)             // Opcode 0xa7
+static void __FASTCALL I386OP(cmpsd)(i386_state *cpustate)             // Opcode 0xa7
 {
        UINT32 eas, ead, src, dst;
        if( cpustate->segment_prefix ) {
@@ -575,61 +575,61 @@ static void I386OP(cmpsd)(i386_state *cpustate)             // Opcode 0xa7
        CYCLES(cpustate,CYCLES_CMPS);
 }
 
-static void I386OP(cwde)(i386_state *cpustate)              // Opcode 0x98
+static void __FASTCALL I386OP(cwde)(i386_state *cpustate)              // Opcode 0x98
 {
        REG32(EAX) = (INT32)((INT16)REG16(AX));
        CYCLES(cpustate,CYCLES_CBW);
 }
 
-static void I386OP(dec_eax)(i386_state *cpustate)           // Opcode 0x48
+static void __FASTCALL I386OP(dec_eax)(i386_state *cpustate)           // Opcode 0x48
 {
        REG32(EAX) = DEC32(cpustate, REG32(EAX) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_ecx)(i386_state *cpustate)           // Opcode 0x49
+static void __FASTCALL I386OP(dec_ecx)(i386_state *cpustate)           // Opcode 0x49
 {
        REG32(ECX) = DEC32(cpustate, REG32(ECX) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_edx)(i386_state *cpustate)           // Opcode 0x4a
+static void __FASTCALL I386OP(dec_edx)(i386_state *cpustate)           // Opcode 0x4a
 {
        REG32(EDX) = DEC32(cpustate, REG32(EDX) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_ebx)(i386_state *cpustate)           // Opcode 0x4b
+static void __FASTCALL I386OP(dec_ebx)(i386_state *cpustate)           // Opcode 0x4b
 {
        REG32(EBX) = DEC32(cpustate, REG32(EBX) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_esp)(i386_state *cpustate)           // Opcode 0x4c
+static void __FASTCALL I386OP(dec_esp)(i386_state *cpustate)           // Opcode 0x4c
 {
        REG32(ESP) = DEC32(cpustate, REG32(ESP) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_ebp)(i386_state *cpustate)           // Opcode 0x4d
+static void __FASTCALL I386OP(dec_ebp)(i386_state *cpustate)           // Opcode 0x4d
 {
        REG32(EBP) = DEC32(cpustate, REG32(EBP) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_esi)(i386_state *cpustate)           // Opcode 0x4e
+static void __FASTCALL I386OP(dec_esi)(i386_state *cpustate)           // Opcode 0x4e
 {
        REG32(ESI) = DEC32(cpustate, REG32(ESI) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(dec_edi)(i386_state *cpustate)           // Opcode 0x4f
+static void __FASTCALL I386OP(dec_edi)(i386_state *cpustate)           // Opcode 0x4f
 {
        REG32(EDI) = DEC32(cpustate, REG32(EDI) );
        CYCLES(cpustate,CYCLES_DEC_REG);
 }
 
-static void I386OP(imul_r32_rm32)(i386_state *cpustate)     // Opcode 0x0f af
+static void __FASTCALL I386OP(imul_r32_rm32)(i386_state *cpustate)     // Opcode 0x0f af
 {
        UINT8 modrm = FETCH(cpustate);
        INT64 result;
@@ -651,7 +651,7 @@ static void I386OP(imul_r32_rm32)(i386_state *cpustate)     // Opcode 0x0f af
        cpustate->CF = cpustate->OF = !(result == (INT64)(INT32)result);
 }
 
-static void I386OP(imul_r32_rm32_i32)(i386_state *cpustate) // Opcode 0x69
+static void __FASTCALL I386OP(imul_r32_rm32_i32)(i386_state *cpustate) // Opcode 0x69
 {
        UINT8 modrm = FETCH(cpustate);
        INT64 result;
@@ -673,7 +673,7 @@ static void I386OP(imul_r32_rm32_i32)(i386_state *cpustate) // Opcode 0x69
        cpustate->CF = cpustate->OF = !(result == (INT64)(INT32)result);
 }
 
-static void I386OP(imul_r32_rm32_i8)(i386_state *cpustate)  // Opcode 0x6b
+static void __FASTCALL I386OP(imul_r32_rm32_i8)(i386_state *cpustate)  // Opcode 0x6b
 {
        UINT8 modrm = FETCH(cpustate);
        INT64 result;
@@ -695,7 +695,7 @@ static void I386OP(imul_r32_rm32_i8)(i386_state *cpustate)  // Opcode 0x6b
        cpustate->CF = cpustate->OF = !(result == (INT64)(INT32)result);
 }
 
-static void I386OP(in_eax_i8)(i386_state *cpustate)         // Opcode 0xe5
+static void __FASTCALL I386OP(in_eax_i8)(i386_state *cpustate)         // Opcode 0xe5
 {
        UINT16 port = FETCH(cpustate);
        UINT32 data = READPORT32(cpustate, port);
@@ -703,7 +703,7 @@ static void I386OP(in_eax_i8)(i386_state *cpustate)         // Opcode 0xe5
        CYCLES(cpustate,CYCLES_IN_VAR);
 }
 
-static void I386OP(in_eax_dx)(i386_state *cpustate)         // Opcode 0xed
+static void __FASTCALL I386OP(in_eax_dx)(i386_state *cpustate)         // Opcode 0xed
 {
        UINT16 port = REG16(DX);
        UINT32 data = READPORT32(cpustate, port);
@@ -711,55 +711,55 @@ static void I386OP(in_eax_dx)(i386_state *cpustate)         // Opcode 0xed
        CYCLES(cpustate,CYCLES_IN);
 }
 
-static void I386OP(inc_eax)(i386_state *cpustate)           // Opcode 0x40
+static void __FASTCALL I386OP(inc_eax)(i386_state *cpustate)           // Opcode 0x40
 {
        REG32(EAX) = INC32(cpustate, REG32(EAX) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_ecx)(i386_state *cpustate)           // Opcode 0x41
+static void __FASTCALL I386OP(inc_ecx)(i386_state *cpustate)           // Opcode 0x41
 {
        REG32(ECX) = INC32(cpustate, REG32(ECX) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_edx)(i386_state *cpustate)           // Opcode 0x42
+static void __FASTCALL I386OP(inc_edx)(i386_state *cpustate)           // Opcode 0x42
 {
        REG32(EDX) = INC32(cpustate, REG32(EDX) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_ebx)(i386_state *cpustate)           // Opcode 0x43
+static void __FASTCALL I386OP(inc_ebx)(i386_state *cpustate)           // Opcode 0x43
 {
        REG32(EBX) = INC32(cpustate, REG32(EBX) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_esp)(i386_state *cpustate)           // Opcode 0x44
+static void __FASTCALL I386OP(inc_esp)(i386_state *cpustate)           // Opcode 0x44
 {
        REG32(ESP) = INC32(cpustate, REG32(ESP) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_ebp)(i386_state *cpustate)           // Opcode 0x45
+static void __FASTCALL I386OP(inc_ebp)(i386_state *cpustate)           // Opcode 0x45
 {
        REG32(EBP) = INC32(cpustate, REG32(EBP) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_esi)(i386_state *cpustate)           // Opcode 0x46
+static void __FASTCALL I386OP(inc_esi)(i386_state *cpustate)           // Opcode 0x46
 {
        REG32(ESI) = INC32(cpustate, REG32(ESI) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(inc_edi)(i386_state *cpustate)           // Opcode 0x47
+static void __FASTCALL I386OP(inc_edi)(i386_state *cpustate)           // Opcode 0x47
 {
        REG32(EDI) = INC32(cpustate, REG32(EDI) );
        CYCLES(cpustate,CYCLES_INC_REG);
 }
 
-static void I386OP(iret32)(i386_state *cpustate)            // Opcode 0xcf
+static void __FASTCALL I386OP(iret32)(i386_state *cpustate)            // Opcode 0xcf
 {
        if( PROTECTED_MODE )
        {
@@ -778,7 +778,7 @@ static void I386OP(iret32)(i386_state *cpustate)            // Opcode 0xcf
        CYCLES(cpustate,CYCLES_IRET);
 }
 
-static void I386OP(ja_rel32)(i386_state *cpustate)          // Opcode 0x0f 87
+static void __FASTCALL I386OP(ja_rel32)(i386_state *cpustate)          // Opcode 0x0f 87
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->CF == 0 && cpustate->ZF == 0 ) {
@@ -790,7 +790,7 @@ static void I386OP(ja_rel32)(i386_state *cpustate)          // Opcode 0x0f 87
        }
 }
 
-static void I386OP(jbe_rel32)(i386_state *cpustate)         // Opcode 0x0f 86
+static void __FASTCALL I386OP(jbe_rel32)(i386_state *cpustate)         // Opcode 0x0f 86
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->CF != 0 || cpustate->ZF != 0 ) {
@@ -802,7 +802,7 @@ static void I386OP(jbe_rel32)(i386_state *cpustate)         // Opcode 0x0f 86
        }
 }
 
-static void I386OP(jc_rel32)(i386_state *cpustate)          // Opcode 0x0f 82
+static void __FASTCALL I386OP(jc_rel32)(i386_state *cpustate)          // Opcode 0x0f 82
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->CF != 0 ) {
@@ -814,7 +814,7 @@ static void I386OP(jc_rel32)(i386_state *cpustate)          // Opcode 0x0f 82
        }
 }
 
-static void I386OP(jg_rel32)(i386_state *cpustate)          // Opcode 0x0f 8f
+static void __FASTCALL I386OP(jg_rel32)(i386_state *cpustate)          // Opcode 0x0f 8f
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->ZF == 0 && (cpustate->SF == cpustate->OF) ) {
@@ -826,7 +826,7 @@ static void I386OP(jg_rel32)(i386_state *cpustate)          // Opcode 0x0f 8f
        }
 }
 
-static void I386OP(jge_rel32)(i386_state *cpustate)         // Opcode 0x0f 8d
+static void __FASTCALL I386OP(jge_rel32)(i386_state *cpustate)         // Opcode 0x0f 8d
 {
        INT32 disp = FETCH32(cpustate);
        if(cpustate->SF == cpustate->OF) {
@@ -838,7 +838,7 @@ static void I386OP(jge_rel32)(i386_state *cpustate)         // Opcode 0x0f 8d
        }
 }
 
-static void I386OP(jl_rel32)(i386_state *cpustate)          // Opcode 0x0f 8c
+static void __FASTCALL I386OP(jl_rel32)(i386_state *cpustate)          // Opcode 0x0f 8c
 {
        INT32 disp = FETCH32(cpustate);
        if( (cpustate->SF != cpustate->OF) ) {
@@ -850,7 +850,7 @@ static void I386OP(jl_rel32)(i386_state *cpustate)          // Opcode 0x0f 8c
        }
 }
 
-static void I386OP(jle_rel32)(i386_state *cpustate)         // Opcode 0x0f 8e
+static void __FASTCALL I386OP(jle_rel32)(i386_state *cpustate)         // Opcode 0x0f 8e
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->ZF != 0 || (cpustate->SF != cpustate->OF) ) {
@@ -862,7 +862,7 @@ static void I386OP(jle_rel32)(i386_state *cpustate)         // Opcode 0x0f 8e
        }
 }
 
-static void I386OP(jnc_rel32)(i386_state *cpustate)         // Opcode 0x0f 83
+static void __FASTCALL I386OP(jnc_rel32)(i386_state *cpustate)         // Opcode 0x0f 83
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->CF == 0 ) {
@@ -874,7 +874,7 @@ static void I386OP(jnc_rel32)(i386_state *cpustate)         // Opcode 0x0f 83
        }
 }
 
-static void I386OP(jno_rel32)(i386_state *cpustate)         // Opcode 0x0f 81
+static void __FASTCALL I386OP(jno_rel32)(i386_state *cpustate)         // Opcode 0x0f 81
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->OF == 0 ) {
@@ -886,7 +886,7 @@ static void I386OP(jno_rel32)(i386_state *cpustate)         // Opcode 0x0f 81
        }
 }
 
-static void I386OP(jnp_rel32)(i386_state *cpustate)         // Opcode 0x0f 8b
+static void __FASTCALL I386OP(jnp_rel32)(i386_state *cpustate)         // Opcode 0x0f 8b
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->PF == 0 ) {
@@ -898,7 +898,7 @@ static void I386OP(jnp_rel32)(i386_state *cpustate)         // Opcode 0x0f 8b
        }
 }
 
-static void I386OP(jns_rel32)(i386_state *cpustate)         // Opcode 0x0f 89
+static void __FASTCALL I386OP(jns_rel32)(i386_state *cpustate)         // Opcode 0x0f 89
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->SF == 0 ) {
@@ -910,7 +910,7 @@ static void I386OP(jns_rel32)(i386_state *cpustate)         // Opcode 0x0f 89
        }
 }
 
-static void I386OP(jnz_rel32)(i386_state *cpustate)         // Opcode 0x0f 85
+static void __FASTCALL I386OP(jnz_rel32)(i386_state *cpustate)         // Opcode 0x0f 85
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->ZF == 0 ) {
@@ -922,7 +922,7 @@ static void I386OP(jnz_rel32)(i386_state *cpustate)         // Opcode 0x0f 85
        }
 }
 
-static void I386OP(jo_rel32)(i386_state *cpustate)          // Opcode 0x0f 80
+static void __FASTCALL I386OP(jo_rel32)(i386_state *cpustate)          // Opcode 0x0f 80
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->OF != 0 ) {
@@ -934,7 +934,7 @@ static void I386OP(jo_rel32)(i386_state *cpustate)          // Opcode 0x0f 80
        }
 }
 
-static void I386OP(jp_rel32)(i386_state *cpustate)          // Opcode 0x0f 8a
+static void __FASTCALL I386OP(jp_rel32)(i386_state *cpustate)          // Opcode 0x0f 8a
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->PF != 0 ) {
@@ -946,7 +946,7 @@ static void I386OP(jp_rel32)(i386_state *cpustate)          // Opcode 0x0f 8a
        }
 }
 
-static void I386OP(js_rel32)(i386_state *cpustate)          // Opcode 0x0f 88
+static void __FASTCALL I386OP(js_rel32)(i386_state *cpustate)          // Opcode 0x0f 88
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->SF != 0 ) {
@@ -958,7 +958,7 @@ static void I386OP(js_rel32)(i386_state *cpustate)          // Opcode 0x0f 88
        }
 }
 
-static void I386OP(jz_rel32)(i386_state *cpustate)          // Opcode 0x0f 84
+static void __FASTCALL I386OP(jz_rel32)(i386_state *cpustate)          // Opcode 0x0f 84
 {
        INT32 disp = FETCH32(cpustate);
        if( cpustate->ZF != 0 ) {
@@ -970,7 +970,7 @@ static void I386OP(jz_rel32)(i386_state *cpustate)          // Opcode 0x0f 84
        }
 }
 
-static void I386OP(jcxz32)(i386_state *cpustate)            // Opcode 0xe3
+static void __FASTCALL I386OP(jcxz32)(i386_state *cpustate)            // Opcode 0xe3
 {
        INT8 disp = FETCH(cpustate);
        int val = (cpustate->address_size)?(REG32(ECX) == 0):(REG16(CX) == 0);
@@ -983,7 +983,7 @@ static void I386OP(jcxz32)(i386_state *cpustate)            // Opcode 0xe3
        }
 }
 
-static void I386OP(jmp_rel32)(i386_state *cpustate)         // Opcode 0xe9
+static void __FASTCALL I386OP(jmp_rel32)(i386_state *cpustate)         // Opcode 0xe9
 {
        UINT32 disp = FETCH32(cpustate);
        /* TODO: Segment limit */
@@ -992,7 +992,7 @@ static void I386OP(jmp_rel32)(i386_state *cpustate)         // Opcode 0xe9
        CYCLES(cpustate,CYCLES_JMP);        /* TODO: Timing = 7 + m */
 }
 
-static void I386OP(jmp_abs32)(i386_state *cpustate)         // Opcode 0xea
+static void __FASTCALL I386OP(jmp_abs32)(i386_state *cpustate)         // Opcode 0xea
 {
        UINT32 address = FETCH32(cpustate);
        UINT16 segment = FETCH16(cpustate);
@@ -1012,7 +1012,7 @@ static void I386OP(jmp_abs32)(i386_state *cpustate)         // Opcode 0xea
        CYCLES(cpustate,CYCLES_JMP_INTERSEG);
 }
 
-static void I386OP(lea32)(i386_state *cpustate)             // Opcode 0x8d
+static void __FASTCALL I386OP(lea32)(i386_state *cpustate)             // Opcode 0x8d
 {
        UINT8 modrm = FETCH(cpustate);
        UINT32 ea = GetNonTranslatedEA(cpustate,modrm,NULL);
@@ -1024,7 +1024,7 @@ static void I386OP(lea32)(i386_state *cpustate)             // Opcode 0x8d
        CYCLES(cpustate,CYCLES_LEA);
 }
 
-static void I386OP(enter32)(i386_state *cpustate)           // Opcode 0xc8
+static void __FASTCALL I386OP(enter32)(i386_state *cpustate)           // Opcode 0xc8
 {
        UINT16 framesize = FETCH16(cpustate);
        UINT8 level = FETCH(cpustate) % 32;
@@ -1053,7 +1053,7 @@ static void I386OP(enter32)(i386_state *cpustate)           // Opcode 0xc8
        CYCLES(cpustate,CYCLES_ENTER);
 }
 
-static void I386OP(leave32)(i386_state *cpustate)           // Opcode 0xc9
+static void __FASTCALL I386OP(leave32)(i386_state *cpustate)           // Opcode 0xc9
 {
        if(!STACK_32BIT)
                REG16(SP) = REG16(BP);
@@ -1063,7 +1063,7 @@ static void I386OP(leave32)(i386_state *cpustate)           // Opcode 0xc9
        CYCLES(cpustate,CYCLES_LEAVE);
 }
 
-static void I386OP(lodsd)(i386_state *cpustate)             // Opcode 0xad
+static void __FASTCALL I386OP(lodsd)(i386_state *cpustate)             // Opcode 0xad
 {
        UINT32 eas;
        if( cpustate->segment_prefix ) {
@@ -1076,7 +1076,7 @@ static void I386OP(lodsd)(i386_state *cpustate)             // Opcode 0xad
        CYCLES(cpustate,CYCLES_LODS);
 }
 
-static void I386OP(loop32)(i386_state *cpustate)            // Opcode 0xe2
+static void __FASTCALL I386OP(loop32)(i386_state *cpustate)            // Opcode 0xe2
 {
        INT8 disp = FETCH(cpustate);
        INT32 reg = (cpustate->address_size)?--REG32(ECX):--REG16(CX);
@@ -1087,7 +1087,7 @@ static void I386OP(loop32)(i386_state *cpustate)            // Opcode 0xe2
        CYCLES(cpustate,CYCLES_LOOP);       /* TODO: Timing = 11 + m */
 }
 
-static void I386OP(loopne32)(i386_state *cpustate)          // Opcode 0xe0
+static void __FASTCALL I386OP(loopne32)(i386_state *cpustate)          // Opcode 0xe0
 {
        INT8 disp = FETCH(cpustate);
        INT32 reg = (cpustate->address_size)?--REG32(ECX):--REG16(CX);
@@ -1098,7 +1098,7 @@ static void I386OP(loopne32)(i386_state *cpustate)          // Opcode 0xe0
        CYCLES(cpustate,CYCLES_LOOPNZ);     /* TODO: Timing = 11 + m */
 }
 
-static void I386OP(loopz32)(i386_state *cpustate)           // Opcode 0xe1
+static void __FASTCALL I386OP(loopz32)(i386_state *cpustate)           // Opcode 0xe1
 {
        INT8 disp = FETCH(cpustate);
        INT32 reg = (cpustate->address_size)?--REG32(ECX):--REG16(CX);
@@ -1109,7 +1109,7 @@ static void I386OP(loopz32)(i386_state *cpustate)           // Opcode 0xe1
        CYCLES(cpustate,CYCLES_LOOPZ);      /* TODO: Timing = 11 + m */
 }
 
-static void I386OP(mov_rm32_r32)(i386_state *cpustate)      // Opcode 0x89
+static void __FASTCALL I386OP(mov_rm32_r32)(i386_state *cpustate)      // Opcode 0x89
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -1125,7 +1125,7 @@ static void I386OP(mov_rm32_r32)(i386_state *cpustate)      // Opcode 0x89
        }
 }
 
-static void I386OP(mov_r32_rm32)(i386_state *cpustate)      // Opcode 0x8b
+static void __FASTCALL I386OP(mov_r32_rm32)(i386_state *cpustate)      // Opcode 0x8b
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -1141,7 +1141,7 @@ static void I386OP(mov_r32_rm32)(i386_state *cpustate)      // Opcode 0x8b
        }
 }
 
-static void I386OP(mov_rm32_i32)(i386_state *cpustate)      // Opcode 0xc7
+static void __FASTCALL I386OP(mov_rm32_i32)(i386_state *cpustate)      // Opcode 0xc7
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1156,7 +1156,7 @@ static void I386OP(mov_rm32_i32)(i386_state *cpustate)      // Opcode 0xc7
        }
 }
 
-static void I386OP(mov_eax_m32)(i386_state *cpustate)       // Opcode 0xa1
+static void __FASTCALL I386OP(mov_eax_m32)(i386_state *cpustate)       // Opcode 0xa1
 {
        UINT32 offset, ea;
        if( cpustate->address_size ) {
@@ -1173,7 +1173,7 @@ static void I386OP(mov_eax_m32)(i386_state *cpustate)       // Opcode 0xa1
        CYCLES(cpustate,CYCLES_MOV_MEM_ACC);
 }
 
-static void I386OP(mov_m32_eax)(i386_state *cpustate)       // Opcode 0xa3
+static void __FASTCALL I386OP(mov_m32_eax)(i386_state *cpustate)       // Opcode 0xa3
 {
        UINT32 offset, ea;
        if( cpustate->address_size ) {
@@ -1190,55 +1190,55 @@ static void I386OP(mov_m32_eax)(i386_state *cpustate)       // Opcode 0xa3
        CYCLES(cpustate,CYCLES_MOV_ACC_MEM);
 }
 
-static void I386OP(mov_eax_i32)(i386_state *cpustate)       // Opcode 0xb8
+static void __FASTCALL I386OP(mov_eax_i32)(i386_state *cpustate)       // Opcode 0xb8
 {
        REG32(EAX) = FETCH32(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_ecx_i32)(i386_state *cpustate)       // Opcode 0xb9
+static void __FASTCALL I386OP(mov_ecx_i32)(i386_state *cpustate)       // Opcode 0xb9
 {
        REG32(ECX) = FETCH32(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_edx_i32)(i386_state *cpustate)       // Opcode 0xba
+static void __FASTCALL I386OP(mov_edx_i32)(i386_state *cpustate)       // Opcode 0xba
 {
        REG32(EDX) = FETCH32(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_ebx_i32)(i386_state *cpustate)       // Opcode 0xbb
+static void __FASTCALL I386OP(mov_ebx_i32)(i386_state *cpustate)       // Opcode 0xbb
 {
        REG32(EBX) = FETCH32(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_esp_i32)(i386_state *cpustate)       // Opcode 0xbc
+static void __FASTCALL I386OP(mov_esp_i32)(i386_state *cpustate)       // Opcode 0xbc
 {
        REG32(ESP) = FETCH32(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_ebp_i32)(i386_state *cpustate)       // Opcode 0xbd
+static void __FASTCALL I386OP(mov_ebp_i32)(i386_state *cpustate)       // Opcode 0xbd
 {
        REG32(EBP) = FETCH32(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_esi_i32)(i386_state *cpustate)       // Opcode 0xbe
+static void __FASTCALL I386OP(mov_esi_i32)(i386_state *cpustate)       // Opcode 0xbe
 {
        REG32(ESI) = FETCH32(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_edi_i32)(i386_state *cpustate)       // Opcode 0xbf
+static void __FASTCALL I386OP(mov_edi_i32)(i386_state *cpustate)       // Opcode 0xbf
 {
        REG32(EDI) = FETCH32(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(movsd)(i386_state *cpustate)             // Opcode 0xa5
+static void __FASTCALL I386OP(movsd)(i386_state *cpustate)             // Opcode 0xa5
 {
        UINT32 eas, ead, v;
        if( cpustate->segment_prefix ) {
@@ -1254,7 +1254,7 @@ static void I386OP(movsd)(i386_state *cpustate)             // Opcode 0xa5
        CYCLES(cpustate,CYCLES_MOVS);
 }
 
-static void I386OP(movsx_r32_rm8)(i386_state *cpustate)     // Opcode 0x0f be
+static void __FASTCALL I386OP(movsx_r32_rm8)(i386_state *cpustate)     // Opcode 0x0f be
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1269,7 +1269,7 @@ static void I386OP(movsx_r32_rm8)(i386_state *cpustate)     // Opcode 0x0f be
        }
 }
 
-static void I386OP(movsx_r32_rm16)(i386_state *cpustate)    // Opcode 0x0f bf
+static void __FASTCALL I386OP(movsx_r32_rm16)(i386_state *cpustate)    // Opcode 0x0f bf
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1284,7 +1284,7 @@ static void I386OP(movsx_r32_rm16)(i386_state *cpustate)    // Opcode 0x0f bf
        }
 }
 
-static void I386OP(movzx_r32_rm8)(i386_state *cpustate)     // Opcode 0x0f b6
+static void __FASTCALL I386OP(movzx_r32_rm8)(i386_state *cpustate)     // Opcode 0x0f b6
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1299,7 +1299,7 @@ static void I386OP(movzx_r32_rm8)(i386_state *cpustate)     // Opcode 0x0f b6
        }
 }
 
-static void I386OP(movzx_r32_rm16)(i386_state *cpustate)    // Opcode 0x0f b7
+static void __FASTCALL I386OP(movzx_r32_rm16)(i386_state *cpustate)    // Opcode 0x0f b7
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1314,7 +1314,7 @@ static void I386OP(movzx_r32_rm16)(i386_state *cpustate)    // Opcode 0x0f b7
        }
 }
 
-static void I386OP(or_rm32_r32)(i386_state *cpustate)       // Opcode 0x09
+static void __FASTCALL I386OP(or_rm32_r32)(i386_state *cpustate)       // Opcode 0x09
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1334,7 +1334,7 @@ static void I386OP(or_rm32_r32)(i386_state *cpustate)       // Opcode 0x09
        }
 }
 
-static void I386OP(or_r32_rm32)(i386_state *cpustate)       // Opcode 0x0b
+static void __FASTCALL I386OP(or_r32_rm32)(i386_state *cpustate)       // Opcode 0x0b
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1354,7 +1354,7 @@ static void I386OP(or_r32_rm32)(i386_state *cpustate)       // Opcode 0x0b
        }
 }
 
-static void I386OP(or_eax_i32)(i386_state *cpustate)        // Opcode 0x0d
+static void __FASTCALL I386OP(or_eax_i32)(i386_state *cpustate)        // Opcode 0x0d
 {
        UINT32 src, dst;
        src = FETCH32(cpustate);
@@ -1364,7 +1364,7 @@ static void I386OP(or_eax_i32)(i386_state *cpustate)        // Opcode 0x0d
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(out_eax_i8)(i386_state *cpustate)        // Opcode 0xe7
+static void __FASTCALL I386OP(out_eax_i8)(i386_state *cpustate)        // Opcode 0xe7
 {
        UINT16 port = FETCH(cpustate);
        UINT32 data = REG32(EAX);
@@ -1372,7 +1372,7 @@ static void I386OP(out_eax_i8)(i386_state *cpustate)        // Opcode 0xe7
        CYCLES(cpustate,CYCLES_OUT_VAR);
 }
 
-static void I386OP(out_eax_dx)(i386_state *cpustate)        // Opcode 0xef
+static void __FASTCALL I386OP(out_eax_dx)(i386_state *cpustate)        // Opcode 0xef
 {
        UINT16 port = REG16(DX);
        UINT32 data = REG32(EAX);
@@ -1380,7 +1380,7 @@ static void I386OP(out_eax_dx)(i386_state *cpustate)        // Opcode 0xef
        CYCLES(cpustate,CYCLES_OUT);
 }
 
-static void I386OP(pop_eax)(i386_state *cpustate)           // Opcode 0x58
+static void __FASTCALL I386OP(pop_eax)(i386_state *cpustate)           // Opcode 0x58
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,4) == 0)
@@ -1390,7 +1390,7 @@ static void I386OP(pop_eax)(i386_state *cpustate)           // Opcode 0x58
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_ecx)(i386_state *cpustate)           // Opcode 0x59
+static void __FASTCALL I386OP(pop_ecx)(i386_state *cpustate)           // Opcode 0x59
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,4) == 0)
@@ -1400,7 +1400,7 @@ static void I386OP(pop_ecx)(i386_state *cpustate)           // Opcode 0x59
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_edx)(i386_state *cpustate)           // Opcode 0x5a
+static void __FASTCALL I386OP(pop_edx)(i386_state *cpustate)           // Opcode 0x5a
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,4) == 0)
@@ -1410,7 +1410,7 @@ static void I386OP(pop_edx)(i386_state *cpustate)           // Opcode 0x5a
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_ebx)(i386_state *cpustate)           // Opcode 0x5b
+static void __FASTCALL I386OP(pop_ebx)(i386_state *cpustate)           // Opcode 0x5b
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,4) == 0)
@@ -1420,7 +1420,7 @@ static void I386OP(pop_ebx)(i386_state *cpustate)           // Opcode 0x5b
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_esp)(i386_state *cpustate)           // Opcode 0x5c
+static void __FASTCALL I386OP(pop_esp)(i386_state *cpustate)           // Opcode 0x5c
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,4) == 0)
@@ -1430,7 +1430,7 @@ static void I386OP(pop_esp)(i386_state *cpustate)           // Opcode 0x5c
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_ebp)(i386_state *cpustate)           // Opcode 0x5d
+static void __FASTCALL I386OP(pop_ebp)(i386_state *cpustate)           // Opcode 0x5d
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,4) == 0)
@@ -1440,7 +1440,7 @@ static void I386OP(pop_ebp)(i386_state *cpustate)           // Opcode 0x5d
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_esi)(i386_state *cpustate)           // Opcode 0x5e
+static void __FASTCALL I386OP(pop_esi)(i386_state *cpustate)           // Opcode 0x5e
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,4) == 0)
@@ -1450,7 +1450,7 @@ static void I386OP(pop_esi)(i386_state *cpustate)           // Opcode 0x5e
        CYCLES(cpustate,CYCLES_POP_REG_SHORT);
 }
 
-static void I386OP(pop_edi)(i386_state *cpustate)           // Opcode 0x5f
+static void __FASTCALL I386OP(pop_edi)(i386_state *cpustate)           // Opcode 0x5f
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,4) == 0)
@@ -1487,27 +1487,27 @@ static bool I386OP(pop_seg32)(i386_state *cpustate, int segment)
        return true;
 }
 
-static void I386OP(pop_ds32)(i386_state *cpustate)          // Opcode 0x1f
+static void __FASTCALL I386OP(pop_ds32)(i386_state *cpustate)          // Opcode 0x1f
 {
        I386OP(pop_seg32)(cpustate, DS);
 }
 
-static void I386OP(pop_es32)(i386_state *cpustate)          // Opcode 0x07
+static void __FASTCALL I386OP(pop_es32)(i386_state *cpustate)          // Opcode 0x07
 {
        I386OP(pop_seg32)(cpustate, ES);
 }
 
-static void I386OP(pop_fs32)(i386_state *cpustate)          // Opcode 0x0f a1
+static void __FASTCALL I386OP(pop_fs32)(i386_state *cpustate)          // Opcode 0x0f a1
 {
        I386OP(pop_seg32)(cpustate, FS);
 }
 
-static void I386OP(pop_gs32)(i386_state *cpustate)          // Opcode 0x0f a9
+static void __FASTCALL I386OP(pop_gs32)(i386_state *cpustate)          // Opcode 0x0f a9
 {
        I386OP(pop_seg32)(cpustate, GS);
 }
 
-static void I386OP(pop_ss32)(i386_state *cpustate)          // Opcode 0x17
+static void __FASTCALL I386OP(pop_ss32)(i386_state *cpustate)          // Opcode 0x17
 {
        if(!I386OP(pop_seg32)(cpustate, SS)) return;
        if(cpustate->IF != 0) // if external interrupts are enabled
@@ -1517,7 +1517,7 @@ static void I386OP(pop_ss32)(i386_state *cpustate)          // Opcode 0x17
        }
 }
 
-static void I386OP(pop_rm32)(i386_state *cpustate)          // Opcode 0x8f
+static void __FASTCALL I386OP(pop_rm32)(i386_state *cpustate)          // Opcode 0x8f
 {
        UINT8 modrm = FETCH(cpustate);
        UINT32 value;
@@ -1551,7 +1551,7 @@ static void I386OP(pop_rm32)(i386_state *cpustate)          // Opcode 0x8f
        CYCLES(cpustate,CYCLES_POP_RM);
 }
 
-static void I386OP(popad)(i386_state *cpustate)             // Opcode 0x61
+static void __FASTCALL I386OP(popad)(i386_state *cpustate)             // Opcode 0x61
 {
        UINT32 offset = (STACK_32BIT ? REG32(ESP) : REG16(SP));
        if(i386_limit_check(cpustate,SS,offset,32) == 0)
@@ -1570,7 +1570,7 @@ static void I386OP(popad)(i386_state *cpustate)             // Opcode 0x61
        CYCLES(cpustate,CYCLES_POPA);
 }
 
-static void I386OP(popfd)(i386_state *cpustate)             // Opcode 0x9d
+static void __FASTCALL I386OP(popfd)(i386_state *cpustate)             // Opcode 0x9d
 {
        UINT32 value;
        UINT32 current = get_flags(cpustate);
@@ -1634,7 +1634,7 @@ static void I386OP(popfd)(i386_state *cpustate)             // Opcode 0x9d
        CYCLES(cpustate,CYCLES_POPF);
 }
 
-static void I386OP(push_eax)(i386_state *cpustate)          // Opcode 0x50
+static void __FASTCALL I386OP(push_eax)(i386_state *cpustate)          // Opcode 0x50
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1648,7 +1648,7 @@ static void I386OP(push_eax)(i386_state *cpustate)          // Opcode 0x50
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_ecx)(i386_state *cpustate)          // Opcode 0x51
+static void __FASTCALL I386OP(push_ecx)(i386_state *cpustate)          // Opcode 0x51
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1662,7 +1662,7 @@ static void I386OP(push_ecx)(i386_state *cpustate)          // Opcode 0x51
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_edx)(i386_state *cpustate)          // Opcode 0x52
+static void __FASTCALL I386OP(push_edx)(i386_state *cpustate)          // Opcode 0x52
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1676,7 +1676,7 @@ static void I386OP(push_edx)(i386_state *cpustate)          // Opcode 0x52
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_ebx)(i386_state *cpustate)          // Opcode 0x53
+static void __FASTCALL I386OP(push_ebx)(i386_state *cpustate)          // Opcode 0x53
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1690,7 +1690,7 @@ static void I386OP(push_ebx)(i386_state *cpustate)          // Opcode 0x53
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_esp)(i386_state *cpustate)          // Opcode 0x54
+static void __FASTCALL I386OP(push_esp)(i386_state *cpustate)          // Opcode 0x54
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1704,7 +1704,7 @@ static void I386OP(push_esp)(i386_state *cpustate)          // Opcode 0x54
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_ebp)(i386_state *cpustate)          // Opcode 0x55
+static void __FASTCALL I386OP(push_ebp)(i386_state *cpustate)          // Opcode 0x55
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1718,7 +1718,7 @@ static void I386OP(push_ebp)(i386_state *cpustate)          // Opcode 0x55
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_esi)(i386_state *cpustate)          // Opcode 0x56
+static void __FASTCALL I386OP(push_esi)(i386_state *cpustate)          // Opcode 0x56
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1732,7 +1732,7 @@ static void I386OP(push_esi)(i386_state *cpustate)          // Opcode 0x56
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_edi)(i386_state *cpustate)          // Opcode 0x57
+static void __FASTCALL I386OP(push_edi)(i386_state *cpustate)          // Opcode 0x57
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1746,7 +1746,7 @@ static void I386OP(push_edi)(i386_state *cpustate)          // Opcode 0x57
        CYCLES(cpustate,CYCLES_PUSH_REG_SHORT);
 }
 
-static void I386OP(push_cs32)(i386_state *cpustate)         // Opcode 0x0e
+static void __FASTCALL I386OP(push_cs32)(i386_state *cpustate)         // Opcode 0x0e
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1760,7 +1760,7 @@ static void I386OP(push_cs32)(i386_state *cpustate)         // Opcode 0x0e
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_ds32)(i386_state *cpustate)         // Opcode 0x1e
+static void __FASTCALL I386OP(push_ds32)(i386_state *cpustate)         // Opcode 0x1e
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1774,7 +1774,7 @@ static void I386OP(push_ds32)(i386_state *cpustate)         // Opcode 0x1e
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_es32)(i386_state *cpustate)         // Opcode 0x06
+static void __FASTCALL I386OP(push_es32)(i386_state *cpustate)         // Opcode 0x06
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1788,7 +1788,7 @@ static void I386OP(push_es32)(i386_state *cpustate)         // Opcode 0x06
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_fs32)(i386_state *cpustate)         // Opcode 0x0f a0
+static void __FASTCALL I386OP(push_fs32)(i386_state *cpustate)         // Opcode 0x0f a0
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1802,7 +1802,7 @@ static void I386OP(push_fs32)(i386_state *cpustate)         // Opcode 0x0f a0
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_gs32)(i386_state *cpustate)         // Opcode 0x0f a8
+static void __FASTCALL I386OP(push_gs32)(i386_state *cpustate)         // Opcode 0x0f a8
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1816,7 +1816,7 @@ static void I386OP(push_gs32)(i386_state *cpustate)         // Opcode 0x0f a8
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_ss32)(i386_state *cpustate)         // Opcode 0x16
+static void __FASTCALL I386OP(push_ss32)(i386_state *cpustate)         // Opcode 0x16
 {
        UINT32 offset;
        if(STACK_32BIT)
@@ -1830,7 +1830,7 @@ static void I386OP(push_ss32)(i386_state *cpustate)         // Opcode 0x16
        CYCLES(cpustate,CYCLES_PUSH_SREG);
 }
 
-static void I386OP(push_i32)(i386_state *cpustate)          // Opcode 0x68
+static void __FASTCALL I386OP(push_i32)(i386_state *cpustate)          // Opcode 0x68
 {
        UINT32 value = FETCH32(cpustate);
        UINT32 offset;
@@ -1845,7 +1845,7 @@ static void I386OP(push_i32)(i386_state *cpustate)          // Opcode 0x68
        CYCLES(cpustate,CYCLES_PUSH_IMM);
 }
 
-static void I386OP(pushad)(i386_state *cpustate)            // Opcode 0x60
+static void __FASTCALL I386OP(pushad)(i386_state *cpustate)            // Opcode 0x60
 {
        UINT32 temp = REG32(ESP);
        UINT32 offset;
@@ -1869,7 +1869,7 @@ static void I386OP(pushad)(i386_state *cpustate)            // Opcode 0x60
        CYCLES(cpustate,CYCLES_PUSHA);
 }
 
-static void I386OP(pushfd)(i386_state *cpustate)            // Opcode 0x9c
+static void __FASTCALL I386OP(pushfd)(i386_state *cpustate)            // Opcode 0x9c
 {
        if(!cpustate->IOP1 && !cpustate->IOP2 && V8086_MODE)
                FAULT(FAULT_GP,0)
@@ -1889,7 +1889,7 @@ static void I386OP(pushfd)(i386_state *cpustate)            // Opcode 0x9c
        CYCLES(cpustate,CYCLES_PUSHF);
 }
 
-static void I386OP(ret_near32_i16)(i386_state *cpustate)    // Opcode 0xc2
+static void __FASTCALL I386OP(ret_near32_i16)(i386_state *cpustate)    // Opcode 0xc2
 {
        INT16 disp = FETCH16(cpustate);
        cpustate->eip = POP32(cpustate);
@@ -1898,14 +1898,14 @@ static void I386OP(ret_near32_i16)(i386_state *cpustate)    // Opcode 0xc2
        CYCLES(cpustate,CYCLES_RET_IMM);        /* TODO: Timing = 10 + m */
 }
 
-static void I386OP(ret_near32)(i386_state *cpustate)        // Opcode 0xc3
+static void __FASTCALL I386OP(ret_near32)(i386_state *cpustate)        // Opcode 0xc3
 {
        cpustate->eip = POP32(cpustate);
        CHANGE_PC(cpustate,cpustate->eip);
        CYCLES(cpustate,CYCLES_RET);        /* TODO: Timing = 10 + m */
 }
 
-static void I386OP(sbb_rm32_r32)(i386_state *cpustate)      // Opcode 0x19
+static void __FASTCALL I386OP(sbb_rm32_r32)(i386_state *cpustate)      // Opcode 0x19
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1925,7 +1925,7 @@ static void I386OP(sbb_rm32_r32)(i386_state *cpustate)      // Opcode 0x19
        }
 }
 
-static void I386OP(sbb_r32_rm32)(i386_state *cpustate)      // Opcode 0x1b
+static void __FASTCALL I386OP(sbb_r32_rm32)(i386_state *cpustate)      // Opcode 0x1b
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1945,7 +1945,7 @@ static void I386OP(sbb_r32_rm32)(i386_state *cpustate)      // Opcode 0x1b
        }
 }
 
-static void I386OP(sbb_eax_i32)(i386_state *cpustate)       // Opcode 0x1d
+static void __FASTCALL I386OP(sbb_eax_i32)(i386_state *cpustate)       // Opcode 0x1d
 {
        UINT32 src, dst;
        src = FETCH32(cpustate);
@@ -1955,7 +1955,7 @@ static void I386OP(sbb_eax_i32)(i386_state *cpustate)       // Opcode 0x1d
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(scasd)(i386_state *cpustate)             // Opcode 0xaf
+static void __FASTCALL I386OP(scasd)(i386_state *cpustate)             // Opcode 0xaf
 {
        UINT32 eas, src, dst;
        eas = i386_translate(cpustate, ES, cpustate->address_size ? REG32(EDI) : REG16(DI), 0, 4 );
@@ -1966,7 +1966,7 @@ static void I386OP(scasd)(i386_state *cpustate)             // Opcode 0xaf
        CYCLES(cpustate,CYCLES_SCAS);
 }
 
-static void I386OP(shld32_i8)(i386_state *cpustate)         // Opcode 0x0f a4
+static void __FASTCALL I386OP(shld32_i8)(i386_state *cpustate)         // Opcode 0x0f a4
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2001,7 +2001,7 @@ static void I386OP(shld32_i8)(i386_state *cpustate)         // Opcode 0x0f a4
        }
 }
 
-static void I386OP(shld32_cl)(i386_state *cpustate)         // Opcode 0x0f a5
+static void __FASTCALL I386OP(shld32_cl)(i386_state *cpustate)         // Opcode 0x0f a5
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2036,7 +2036,7 @@ static void I386OP(shld32_cl)(i386_state *cpustate)         // Opcode 0x0f a5
        }
 }
 
-static void I386OP(shrd32_i8)(i386_state *cpustate)         // Opcode 0x0f ac
+static void __FASTCALL I386OP(shrd32_i8)(i386_state *cpustate)         // Opcode 0x0f ac
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2071,7 +2071,7 @@ static void I386OP(shrd32_i8)(i386_state *cpustate)         // Opcode 0x0f ac
        }
 }
 
-static void I386OP(shrd32_cl)(i386_state *cpustate)         // Opcode 0x0f ad
+static void __FASTCALL I386OP(shrd32_cl)(i386_state *cpustate)         // Opcode 0x0f ad
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2106,7 +2106,7 @@ static void I386OP(shrd32_cl)(i386_state *cpustate)         // Opcode 0x0f ad
        }
 }
 
-static void I386OP(stosd)(i386_state *cpustate)             // Opcode 0xab
+static void __FASTCALL I386OP(stosd)(i386_state *cpustate)             // Opcode 0xab
 {
        UINT32 eas = i386_translate(cpustate, ES, cpustate->address_size ? REG32(EDI) : REG16(DI), 1, 4 );
        WRITE32(cpustate,eas, REG32(EAX));
@@ -2114,7 +2114,7 @@ static void I386OP(stosd)(i386_state *cpustate)             // Opcode 0xab
        CYCLES(cpustate,CYCLES_STOS);
 }
 
-static void I386OP(sub_rm32_r32)(i386_state *cpustate)      // Opcode 0x29
+static void __FASTCALL I386OP(sub_rm32_r32)(i386_state *cpustate)      // Opcode 0x29
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2134,7 +2134,7 @@ static void I386OP(sub_rm32_r32)(i386_state *cpustate)      // Opcode 0x29
        }
 }
 
-static void I386OP(sub_r32_rm32)(i386_state *cpustate)      // Opcode 0x2b
+static void __FASTCALL I386OP(sub_r32_rm32)(i386_state *cpustate)      // Opcode 0x2b
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2154,7 +2154,7 @@ static void I386OP(sub_r32_rm32)(i386_state *cpustate)      // Opcode 0x2b
        }
 }
 
-static void I386OP(sub_eax_i32)(i386_state *cpustate)       // Opcode 0x2d
+static void __FASTCALL I386OP(sub_eax_i32)(i386_state *cpustate)       // Opcode 0x2d
 {
        UINT32 src, dst;
        src = FETCH32(cpustate);
@@ -2164,7 +2164,7 @@ static void I386OP(sub_eax_i32)(i386_state *cpustate)       // Opcode 0x2d
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(test_eax_i32)(i386_state *cpustate)      // Opcode 0xa9
+static void __FASTCALL I386OP(test_eax_i32)(i386_state *cpustate)      // Opcode 0xa9
 {
        UINT32 src = FETCH32(cpustate);
        UINT32 dst = REG32(EAX);
@@ -2175,7 +2175,7 @@ static void I386OP(test_eax_i32)(i386_state *cpustate)      // Opcode 0xa9
        CYCLES(cpustate,CYCLES_TEST_IMM_ACC);
 }
 
-static void I386OP(test_rm32_r32)(i386_state *cpustate)     // Opcode 0x85
+static void __FASTCALL I386OP(test_rm32_r32)(i386_state *cpustate)     // Opcode 0x85
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2199,7 +2199,7 @@ static void I386OP(test_rm32_r32)(i386_state *cpustate)     // Opcode 0x85
        }
 }
 
-static void I386OP(xchg_eax_ecx)(i386_state *cpustate)      // Opcode 0x91
+static void __FASTCALL I386OP(xchg_eax_ecx)(i386_state *cpustate)      // Opcode 0x91
 {
        UINT32 temp;
        temp = REG32(EAX);
@@ -2208,7 +2208,7 @@ static void I386OP(xchg_eax_ecx)(i386_state *cpustate)      // Opcode 0x91
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_eax_edx)(i386_state *cpustate)      // Opcode 0x92
+static void __FASTCALL I386OP(xchg_eax_edx)(i386_state *cpustate)      // Opcode 0x92
 {
        UINT32 temp;
        temp = REG32(EAX);
@@ -2217,7 +2217,7 @@ static void I386OP(xchg_eax_edx)(i386_state *cpustate)      // Opcode 0x92
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_eax_ebx)(i386_state *cpustate)      // Opcode 0x93
+static void __FASTCALL I386OP(xchg_eax_ebx)(i386_state *cpustate)      // Opcode 0x93
 {
        UINT32 temp;
        temp = REG32(EAX);
@@ -2226,7 +2226,7 @@ static void I386OP(xchg_eax_ebx)(i386_state *cpustate)      // Opcode 0x93
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_eax_esp)(i386_state *cpustate)      // Opcode 0x94
+static void __FASTCALL I386OP(xchg_eax_esp)(i386_state *cpustate)      // Opcode 0x94
 {
        UINT32 temp;
        temp = REG32(EAX);
@@ -2235,7 +2235,7 @@ static void I386OP(xchg_eax_esp)(i386_state *cpustate)      // Opcode 0x94
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_eax_ebp)(i386_state *cpustate)      // Opcode 0x95
+static void __FASTCALL I386OP(xchg_eax_ebp)(i386_state *cpustate)      // Opcode 0x95
 {
        UINT32 temp;
        temp = REG32(EAX);
@@ -2244,7 +2244,7 @@ static void I386OP(xchg_eax_ebp)(i386_state *cpustate)      // Opcode 0x95
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_eax_esi)(i386_state *cpustate)      // Opcode 0x96
+static void __FASTCALL I386OP(xchg_eax_esi)(i386_state *cpustate)      // Opcode 0x96
 {
        UINT32 temp;
        temp = REG32(EAX);
@@ -2253,7 +2253,7 @@ static void I386OP(xchg_eax_esi)(i386_state *cpustate)      // Opcode 0x96
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_eax_edi)(i386_state *cpustate)      // Opcode 0x97
+static void __FASTCALL I386OP(xchg_eax_edi)(i386_state *cpustate)      // Opcode 0x97
 {
        UINT32 temp;
        temp = REG32(EAX);
@@ -2262,7 +2262,7 @@ static void I386OP(xchg_eax_edi)(i386_state *cpustate)      // Opcode 0x97
        CYCLES(cpustate,CYCLES_XCHG_REG_REG);
 }
 
-static void I386OP(xchg_r32_rm32)(i386_state *cpustate)     // Opcode 0x87
+static void __FASTCALL I386OP(xchg_r32_rm32)(i386_state *cpustate)     // Opcode 0x87
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2281,7 +2281,7 @@ static void I386OP(xchg_r32_rm32)(i386_state *cpustate)     // Opcode 0x87
        }
 }
 
-static void I386OP(xor_rm32_r32)(i386_state *cpustate)      // Opcode 0x31
+static void __FASTCALL I386OP(xor_rm32_r32)(i386_state *cpustate)      // Opcode 0x31
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2301,7 +2301,7 @@ static void I386OP(xor_rm32_r32)(i386_state *cpustate)      // Opcode 0x31
        }
 }
 
-static void I386OP(xor_r32_rm32)(i386_state *cpustate)      // Opcode 0x33
+static void __FASTCALL I386OP(xor_r32_rm32)(i386_state *cpustate)      // Opcode 0x33
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2321,7 +2321,7 @@ static void I386OP(xor_r32_rm32)(i386_state *cpustate)      // Opcode 0x33
        }
 }
 
-static void I386OP(xor_eax_i32)(i386_state *cpustate)       // Opcode 0x35
+static void __FASTCALL I386OP(xor_eax_i32)(i386_state *cpustate)       // Opcode 0x35
 {
        UINT32 src, dst;
        src = FETCH32(cpustate);
@@ -2333,7 +2333,7 @@ static void I386OP(xor_eax_i32)(i386_state *cpustate)       // Opcode 0x35
 
 
 
-static void I386OP(group81_32)(i386_state *cpustate)        // Opcode 0x81
+static void __FASTCALL I386OP(group81_32)(i386_state *cpustate)        // Opcode 0x81
 {
        UINT32 ea;
        UINT32 src, dst;
@@ -2470,7 +2470,7 @@ static void I386OP(group81_32)(i386_state *cpustate)        // Opcode 0x81
        }
 }
 
-static void I386OP(group83_32)(i386_state *cpustate)        // Opcode 0x83
+static void __FASTCALL I386OP(group83_32)(i386_state *cpustate)        // Opcode 0x83
 {
        UINT32 ea;
        UINT32 src, dst;
@@ -2607,7 +2607,7 @@ static void I386OP(group83_32)(i386_state *cpustate)        // Opcode 0x83
        }
 }
 
-static void I386OP(groupC1_32)(i386_state *cpustate)        // Opcode 0xc1
+static void __FASTCALL I386OP(groupC1_32)(i386_state *cpustate)        // Opcode 0xc1
 {
        UINT32 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2627,7 +2627,7 @@ static void I386OP(groupC1_32)(i386_state *cpustate)        // Opcode 0xc1
        }
 }
 
-static void I386OP(groupD1_32)(i386_state *cpustate)        // Opcode 0xd1
+static void __FASTCALL I386OP(groupD1_32)(i386_state *cpustate)        // Opcode 0xd1
 {
        UINT32 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2644,7 +2644,7 @@ static void I386OP(groupD1_32)(i386_state *cpustate)        // Opcode 0xd1
        }
 }
 
-static void I386OP(groupD3_32)(i386_state *cpustate)        // Opcode 0xd3
+static void __FASTCALL I386OP(groupD3_32)(i386_state *cpustate)        // Opcode 0xd3
 {
        UINT32 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2661,7 +2661,7 @@ static void I386OP(groupD3_32)(i386_state *cpustate)        // Opcode 0xd3
        }
 }
 
-static void I386OP(groupF7_32)(i386_state *cpustate)        // Opcode 0xf7
+static void __FASTCALL I386OP(groupF7_32)(i386_state *cpustate)        // Opcode 0xf7
 {
        UINT8 modrm = FETCH(cpustate);
 
@@ -2817,7 +2817,7 @@ static void I386OP(groupF7_32)(i386_state *cpustate)        // Opcode 0xf7
        }
 }
 
-static void I386OP(groupFF_32)(i386_state *cpustate)        // Opcode 0xff
+static void __FASTCALL I386OP(groupFF_32)(i386_state *cpustate)        // Opcode 0xff
 {
        UINT8 modrm = FETCH(cpustate);
 
@@ -2963,7 +2963,7 @@ static void I386OP(groupFF_32)(i386_state *cpustate)        // Opcode 0xff
        }
 }
 
-static void I386OP(group0F00_32)(i386_state *cpustate)          // Opcode 0x0f 00
+static void __FASTCALL I386OP(group0F00_32)(i386_state *cpustate)          // Opcode 0x0f 00
 {
        UINT32 address, ea;
        UINT8 modrm = FETCH(cpustate);
@@ -3166,7 +3166,7 @@ static void I386OP(group0F00_32)(i386_state *cpustate)          // Opcode 0x0f 0
        }
 }
 
-static void I386OP(group0F01_32)(i386_state *cpustate)      // Opcode 0x0f 01
+static void __FASTCALL I386OP(group0F01_32)(i386_state *cpustate)      // Opcode 0x0f 01
 {
        UINT8 modrm = FETCH(cpustate);
        UINT32 address, ea;
@@ -3275,7 +3275,7 @@ static void I386OP(group0F01_32)(i386_state *cpustate)      // Opcode 0x0f 01
        }
 }
 
-static void I386OP(group0FBA_32)(i386_state *cpustate)      // Opcode 0x0f ba
+static void __FASTCALL I386OP(group0FBA_32)(i386_state *cpustate)      // Opcode 0x0f ba
 {
        UINT8 modrm = FETCH(cpustate);
 
@@ -3395,7 +3395,7 @@ static void I386OP(group0FBA_32)(i386_state *cpustate)      // Opcode 0x0f ba
        }
 }
 
-static void I386OP(lar_r32_rm32)(i386_state *cpustate)  // Opcode 0x0f 0x02
+static void __FASTCALL I386OP(lar_r32_rm32)(i386_state *cpustate)  // Opcode 0x0f 0x02
 {
        UINT8 modrm = FETCH(cpustate);
        I386_SREG seg;
@@ -3462,7 +3462,7 @@ static void I386OP(lar_r32_rm32)(i386_state *cpustate)  // Opcode 0x0f 0x02
        }
 }
 
-static void I386OP(lsl_r32_rm32)(i386_state *cpustate)  // Opcode 0x0f 0x03
+static void __FASTCALL I386OP(lsl_r32_rm32)(i386_state *cpustate)  // Opcode 0x0f 0x03
 {
        UINT8 modrm = FETCH(cpustate);
        UINT32 limit;
@@ -3527,7 +3527,7 @@ static void I386OP(lsl_r32_rm32)(i386_state *cpustate)  // Opcode 0x0f 0x03
        }
 }
 
-static void I386OP(bound_r32_m32_m32)(i386_state *cpustate) // Opcode 0x62
+static void __FASTCALL I386OP(bound_r32_m32_m32)(i386_state *cpustate) // Opcode 0x62
 {
        UINT8 modrm;
        INT32 val, low, high;
@@ -3557,7 +3557,7 @@ static void I386OP(bound_r32_m32_m32)(i386_state *cpustate) // Opcode 0x62
        }
 }
 
-static void I386OP(retf32)(i386_state *cpustate)            // Opcode 0xcb
+static void __FASTCALL I386OP(retf32)(i386_state *cpustate)            // Opcode 0xcb
 {
        if(PROTECTED_MODE && !V8086_MODE)
        {
@@ -3574,7 +3574,7 @@ static void I386OP(retf32)(i386_state *cpustate)            // Opcode 0xcb
        CYCLES(cpustate,CYCLES_RET_INTERSEG);
 }
 
-static void I386OP(retf_i32)(i386_state *cpustate)          // Opcode 0xca
+static void __FASTCALL I386OP(retf_i32)(i386_state *cpustate)          // Opcode 0xca
 {
        UINT16 count = FETCH16(cpustate);
 
@@ -3594,7 +3594,7 @@ static void I386OP(retf_i32)(i386_state *cpustate)          // Opcode 0xca
        CYCLES(cpustate,CYCLES_RET_IMM_INTERSEG);
 }
 
-static void I386OP(load_far_pointer32)(i386_state *cpustate, int s)
+static void __FASTCALL I386OP(load_far_pointer32)(i386_state *cpustate, int s)
 {
        UINT8 modrm = FETCH(cpustate);
        UINT16 selector;
@@ -3609,31 +3609,31 @@ static void I386OP(load_far_pointer32)(i386_state *cpustate, int s)
        }
 }
 
-static void I386OP(lds32)(i386_state *cpustate)             // Opcode 0xc5
+static void __FASTCALL I386OP(lds32)(i386_state *cpustate)             // Opcode 0xc5
 {
        I386OP(load_far_pointer32)(cpustate, DS);
        CYCLES(cpustate,CYCLES_LDS);
 }
 
-static void I386OP(lss32)(i386_state *cpustate)             // Opcode 0x0f 0xb2
+static void __FASTCALL I386OP(lss32)(i386_state *cpustate)             // Opcode 0x0f 0xb2
 {
        I386OP(load_far_pointer32)(cpustate, SS);
        CYCLES(cpustate,CYCLES_LSS);
 }
 
-static void I386OP(les32)(i386_state *cpustate)             // Opcode 0xc4
+static void __FASTCALL I386OP(les32)(i386_state *cpustate)             // Opcode 0xc4
 {
        I386OP(load_far_pointer32)(cpustate, ES);
        CYCLES(cpustate,CYCLES_LES);
 }
 
-static void I386OP(lfs32)(i386_state *cpustate)             // Opcode 0x0f 0xb4
+static void __FASTCALL I386OP(lfs32)(i386_state *cpustate)             // Opcode 0x0f 0xb4
 {
        I386OP(load_far_pointer32)(cpustate, FS);
        CYCLES(cpustate,CYCLES_LFS);
 }
 
-static void I386OP(lgs32)(i386_state *cpustate)             // Opcode 0x0f 0xb5
+static void __FASTCALL I386OP(lgs32)(i386_state *cpustate)             // Opcode 0x0f 0xb5
 {
        I386OP(load_far_pointer32)(cpustate, GS);
        CYCLES(cpustate,CYCLES_LGS);
index 3c8be1e..825eaf7 100644 (file)
@@ -1,6 +1,6 @@
 // license:BSD-3-Clause
 // copyright-holders:Ville Linde, Barry Rodewald, Carl, Philip Bennett
-static UINT8 I386OP(shift_rotate8)(i386_state *cpustate, UINT8 modrm, UINT32 value, UINT8 shift)
+static UINT8 __FASTCALL I386OP(shift_rotate8)(i386_state *cpustate, UINT8 modrm, UINT32 value, UINT8 shift)
 {
        UINT32 src = value & 0xff;
        UINT8 dst = value;
@@ -143,7 +143,7 @@ static UINT8 I386OP(shift_rotate8)(i386_state *cpustate, UINT8 modrm, UINT32 val
 
 
 
-static void I386OP(adc_rm8_r8)(i386_state *cpustate)        // Opcode 0x10
+static void __FASTCALL I386OP(adc_rm8_r8)(i386_state *cpustate)        // Opcode 0x10
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -163,7 +163,7 @@ static void I386OP(adc_rm8_r8)(i386_state *cpustate)        // Opcode 0x10
        }
 }
 
-static void I386OP(adc_r8_rm8)(i386_state *cpustate)        // Opcode 0x12
+static void __FASTCALL I386OP(adc_r8_rm8)(i386_state *cpustate)        // Opcode 0x12
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -183,7 +183,7 @@ static void I386OP(adc_r8_rm8)(i386_state *cpustate)        // Opcode 0x12
        }
 }
 
-static void I386OP(adc_al_i8)(i386_state *cpustate)     // Opcode 0x14
+static void __FASTCALL I386OP(adc_al_i8)(i386_state *cpustate)     // Opcode 0x14
 {
        UINT8 src, dst;
        src = FETCH(cpustate);
@@ -193,7 +193,7 @@ static void I386OP(adc_al_i8)(i386_state *cpustate)     // Opcode 0x14
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(add_rm8_r8)(i386_state *cpustate)        // Opcode 0x00
+static void __FASTCALL I386OP(add_rm8_r8)(i386_state *cpustate)        // Opcode 0x00
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -213,7 +213,7 @@ static void I386OP(add_rm8_r8)(i386_state *cpustate)        // Opcode 0x00
        }
 }
 
-static void I386OP(add_r8_rm8)(i386_state *cpustate)        // Opcode 0x02
+static void __FASTCALL I386OP(add_r8_rm8)(i386_state *cpustate)        // Opcode 0x02
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -233,7 +233,7 @@ static void I386OP(add_r8_rm8)(i386_state *cpustate)        // Opcode 0x02
        }
 }
 
-static void I386OP(add_al_i8)(i386_state *cpustate)     // Opcode 0x04
+static void __FASTCALL I386OP(add_al_i8)(i386_state *cpustate)     // Opcode 0x04
 {
        UINT8 src, dst;
        src = FETCH(cpustate);
@@ -243,7 +243,7 @@ static void I386OP(add_al_i8)(i386_state *cpustate)     // Opcode 0x04
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(and_rm8_r8)(i386_state *cpustate)        // Opcode 0x20
+static void __FASTCALL I386OP(and_rm8_r8)(i386_state *cpustate)        // Opcode 0x20
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -263,7 +263,7 @@ static void I386OP(and_rm8_r8)(i386_state *cpustate)        // Opcode 0x20
        }
 }
 
-static void I386OP(and_r8_rm8)(i386_state *cpustate)        // Opcode 0x22
+static void __FASTCALL I386OP(and_r8_rm8)(i386_state *cpustate)        // Opcode 0x22
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -283,7 +283,7 @@ static void I386OP(and_r8_rm8)(i386_state *cpustate)        // Opcode 0x22
        }
 }
 
-static void I386OP(and_al_i8)(i386_state *cpustate)         // Opcode 0x24
+static void __FASTCALL I386OP(and_al_i8)(i386_state *cpustate)         // Opcode 0x24
 {
        UINT8 src, dst;
        src = FETCH(cpustate);
@@ -293,19 +293,19 @@ static void I386OP(and_al_i8)(i386_state *cpustate)         // Opcode 0x24
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(clc)(i386_state *cpustate)               // Opcode 0xf8
+static void __FASTCALL I386OP(clc)(i386_state *cpustate)               // Opcode 0xf8
 {
        cpustate->CF = 0;
        CYCLES(cpustate,CYCLES_CLC);
 }
 
-static void I386OP(cld)(i386_state *cpustate)               // Opcode 0xfc
+static void __FASTCALL I386OP(cld)(i386_state *cpustate)               // Opcode 0xfc
 {
        cpustate->DF = 0;
        CYCLES(cpustate,CYCLES_CLD);
 }
 
-static void I386OP(cli)(i386_state *cpustate)               // Opcode 0xfa
+static void __FASTCALL I386OP(cli)(i386_state *cpustate)               // Opcode 0xfa
 {
        if(PROTECTED_MODE)
        {
@@ -320,13 +320,13 @@ static void I386OP(cli)(i386_state *cpustate)               // Opcode 0xfa
        CYCLES(cpustate,CYCLES_CLI);
 }
 
-static void I386OP(cmc)(i386_state *cpustate)               // Opcode 0xf5
+static void __FASTCALL I386OP(cmc)(i386_state *cpustate)               // Opcode 0xf5
 {
        cpustate->CF ^= 1;
        CYCLES(cpustate,CYCLES_CMC);
 }
 
-static void I386OP(cmp_rm8_r8)(i386_state *cpustate)        // Opcode 0x38
+static void __FASTCALL I386OP(cmp_rm8_r8)(i386_state *cpustate)        // Opcode 0x38
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -344,7 +344,7 @@ static void I386OP(cmp_rm8_r8)(i386_state *cpustate)        // Opcode 0x38
        }
 }
 
-static void I386OP(cmp_r8_rm8)(i386_state *cpustate)        // Opcode 0x3a
+static void __FASTCALL I386OP(cmp_r8_rm8)(i386_state *cpustate)        // Opcode 0x3a
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -362,7 +362,7 @@ static void I386OP(cmp_r8_rm8)(i386_state *cpustate)        // Opcode 0x3a
        }
 }
 
-static void I386OP(cmp_al_i8)(i386_state *cpustate)         // Opcode 0x3c
+static void __FASTCALL I386OP(cmp_al_i8)(i386_state *cpustate)         // Opcode 0x3c
 {
        UINT8 src, dst;
        src = FETCH(cpustate);
@@ -371,7 +371,7 @@ static void I386OP(cmp_al_i8)(i386_state *cpustate)         // Opcode 0x3c
        CYCLES(cpustate,CYCLES_CMP_IMM_ACC);
 }
 
-static void I386OP(cmpsb)(i386_state *cpustate)             // Opcode 0xa6
+static void __FASTCALL I386OP(cmpsb)(i386_state *cpustate)             // Opcode 0xa6
 {
        UINT32 eas, ead;
        UINT8 src, dst;
@@ -389,7 +389,7 @@ static void I386OP(cmpsb)(i386_state *cpustate)             // Opcode 0xa6
        CYCLES(cpustate,CYCLES_CMPS);
 }
 
-static void I386OP(in_al_i8)(i386_state *cpustate)          // Opcode 0xe4
+static void __FASTCALL I386OP(in_al_i8)(i386_state *cpustate)          // Opcode 0xe4
 {
        UINT16 port = FETCH(cpustate);
        UINT8 data = READPORT8(cpustate, port);
@@ -397,7 +397,7 @@ static void I386OP(in_al_i8)(i386_state *cpustate)          // Opcode 0xe4
        CYCLES(cpustate,CYCLES_IN_VAR);
 }
 
-static void I386OP(in_al_dx)(i386_state *cpustate)          // Opcode 0xec
+static void __FASTCALL I386OP(in_al_dx)(i386_state *cpustate)          // Opcode 0xec
 {
        UINT16 port = REG16(DX);
        UINT8 data = READPORT8(cpustate, port);
@@ -405,7 +405,7 @@ static void I386OP(in_al_dx)(i386_state *cpustate)          // Opcode 0xec
        CYCLES(cpustate,CYCLES_IN);
 }
 
-static void I386OP(ja_rel8)(i386_state *cpustate)           // Opcode 0x77
+static void __FASTCALL I386OP(ja_rel8)(i386_state *cpustate)           // Opcode 0x77
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->CF == 0 && cpustate->ZF == 0 ) {
@@ -416,7 +416,7 @@ static void I386OP(ja_rel8)(i386_state *cpustate)           // Opcode 0x77
        }
 }
 
-static void I386OP(jbe_rel8)(i386_state *cpustate)          // Opcode 0x76
+static void __FASTCALL I386OP(jbe_rel8)(i386_state *cpustate)          // Opcode 0x76
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->CF != 0 || cpustate->ZF != 0 ) {
@@ -427,7 +427,7 @@ static void I386OP(jbe_rel8)(i386_state *cpustate)          // Opcode 0x76
        }
 }
 
-static void I386OP(jc_rel8)(i386_state *cpustate)           // Opcode 0x72
+static void __FASTCALL I386OP(jc_rel8)(i386_state *cpustate)           // Opcode 0x72
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->CF != 0 ) {
@@ -438,7 +438,7 @@ static void I386OP(jc_rel8)(i386_state *cpustate)           // Opcode 0x72
        }
 }
 
-static void I386OP(jg_rel8)(i386_state *cpustate)           // Opcode 0x7f
+static void __FASTCALL I386OP(jg_rel8)(i386_state *cpustate)           // Opcode 0x7f
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->ZF == 0 && (cpustate->SF == cpustate->OF) ) {
@@ -449,7 +449,7 @@ static void I386OP(jg_rel8)(i386_state *cpustate)           // Opcode 0x7f
        }
 }
 
-static void I386OP(jge_rel8)(i386_state *cpustate)          // Opcode 0x7d
+static void __FASTCALL I386OP(jge_rel8)(i386_state *cpustate)          // Opcode 0x7d
 {
        INT8 disp = FETCH(cpustate);
        if(cpustate->SF == cpustate->OF) {
@@ -460,7 +460,7 @@ static void I386OP(jge_rel8)(i386_state *cpustate)          // Opcode 0x7d
        }
 }
 
-static void I386OP(jl_rel8)(i386_state *cpustate)           // Opcode 0x7c
+static void __FASTCALL I386OP(jl_rel8)(i386_state *cpustate)           // Opcode 0x7c
 {
        INT8 disp = FETCH(cpustate);
        if( (cpustate->SF != cpustate->OF) ) {
@@ -471,7 +471,7 @@ static void I386OP(jl_rel8)(i386_state *cpustate)           // Opcode 0x7c
        }
 }
 
-static void I386OP(jle_rel8)(i386_state *cpustate)      // Opcode 0x7e
+static void __FASTCALL I386OP(jle_rel8)(i386_state *cpustate)      // Opcode 0x7e
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->ZF != 0 || (cpustate->SF != cpustate->OF) ) {
@@ -482,7 +482,7 @@ static void I386OP(jle_rel8)(i386_state *cpustate)      // Opcode 0x7e
        }
 }
 
-static void I386OP(jnc_rel8)(i386_state *cpustate)          // Opcode 0x73
+static void __FASTCALL I386OP(jnc_rel8)(i386_state *cpustate)          // Opcode 0x73
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->CF == 0 ) {
@@ -493,7 +493,7 @@ static void I386OP(jnc_rel8)(i386_state *cpustate)          // Opcode 0x73
        }
 }
 
-static void I386OP(jno_rel8)(i386_state *cpustate)          // Opcode 0x71
+static void __FASTCALL I386OP(jno_rel8)(i386_state *cpustate)          // Opcode 0x71
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->OF == 0 ) {
@@ -504,7 +504,7 @@ static void I386OP(jno_rel8)(i386_state *cpustate)          // Opcode 0x71
        }
 }
 
-static void I386OP(jnp_rel8)(i386_state *cpustate)          // Opcode 0x7b
+static void __FASTCALL I386OP(jnp_rel8)(i386_state *cpustate)          // Opcode 0x7b
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->PF == 0 ) {
@@ -515,7 +515,7 @@ static void I386OP(jnp_rel8)(i386_state *cpustate)          // Opcode 0x7b
        }
 }
 
-static void I386OP(jns_rel8)(i386_state *cpustate)          // Opcode 0x79
+static void __FASTCALL I386OP(jns_rel8)(i386_state *cpustate)          // Opcode 0x79
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->SF == 0 ) {
@@ -526,7 +526,7 @@ static void I386OP(jns_rel8)(i386_state *cpustate)          // Opcode 0x79
        }
 }
 
-static void I386OP(jnz_rel8)(i386_state *cpustate)          // Opcode 0x75
+static void __FASTCALL I386OP(jnz_rel8)(i386_state *cpustate)          // Opcode 0x75
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->ZF == 0 ) {
@@ -537,7 +537,7 @@ static void I386OP(jnz_rel8)(i386_state *cpustate)          // Opcode 0x75
        }
 }
 
-static void I386OP(jo_rel8)(i386_state *cpustate)           // Opcode 0x70
+static void __FASTCALL I386OP(jo_rel8)(i386_state *cpustate)           // Opcode 0x70
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->OF != 0 ) {
@@ -548,7 +548,7 @@ static void I386OP(jo_rel8)(i386_state *cpustate)           // Opcode 0x70
        }
 }
 
-static void I386OP(jp_rel8)(i386_state *cpustate)           // Opcode 0x7a
+static void __FASTCALL I386OP(jp_rel8)(i386_state *cpustate)           // Opcode 0x7a
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->PF != 0 ) {
@@ -559,7 +559,7 @@ static void I386OP(jp_rel8)(i386_state *cpustate)           // Opcode 0x7a
        }
 }
 
-static void I386OP(js_rel8)(i386_state *cpustate)           // Opcode 0x78
+static void __FASTCALL I386OP(js_rel8)(i386_state *cpustate)           // Opcode 0x78
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->SF != 0 ) {
@@ -570,7 +570,7 @@ static void I386OP(js_rel8)(i386_state *cpustate)           // Opcode 0x78
        }
 }
 
-static void I386OP(jz_rel8)(i386_state *cpustate)           // Opcode 0x74
+static void __FASTCALL I386OP(jz_rel8)(i386_state *cpustate)           // Opcode 0x74
 {
        INT8 disp = FETCH(cpustate);
        if( cpustate->ZF != 0 ) {
@@ -581,20 +581,20 @@ static void I386OP(jz_rel8)(i386_state *cpustate)           // Opcode 0x74
        }
 }
 
-static void I386OP(jmp_rel8)(i386_state *cpustate)          // Opcode 0xeb
+static void __FASTCALL I386OP(jmp_rel8)(i386_state *cpustate)          // Opcode 0xeb
 {
        INT8 disp = FETCH(cpustate);
        NEAR_BRANCH(cpustate,disp);
        CYCLES(cpustate,CYCLES_JMP_SHORT);      /* TODO: Timing = 7 + m */
 }
 
-static void I386OP(lahf)(i386_state *cpustate)              // Opcode 0x9f
+static void __FASTCALL I386OP(lahf)(i386_state *cpustate)              // Opcode 0x9f
 {
        REG8(AH) = get_flags(cpustate) & 0xd7;
        CYCLES(cpustate,CYCLES_LAHF);
 }
 
-static void I386OP(lodsb)(i386_state *cpustate)             // Opcode 0xac
+static void __FASTCALL I386OP(lodsb)(i386_state *cpustate)             // Opcode 0xac
 {
        UINT32 eas;
        if( cpustate->segment_prefix ) {
@@ -607,7 +607,7 @@ static void I386OP(lodsb)(i386_state *cpustate)             // Opcode 0xac
        CYCLES(cpustate,CYCLES_LODS);
 }
 
-static void I386OP(mov_rm8_r8)(i386_state *cpustate)        // Opcode 0x88
+static void __FASTCALL I386OP(mov_rm8_r8)(i386_state *cpustate)        // Opcode 0x88
 {
        UINT8 src;
        UINT8 modrm = FETCH(cpustate);
@@ -623,7 +623,7 @@ static void I386OP(mov_rm8_r8)(i386_state *cpustate)        // Opcode 0x88
        }
 }
 
-static void I386OP(mov_r8_rm8)(i386_state *cpustate)        // Opcode 0x8a
+static void __FASTCALL I386OP(mov_r8_rm8)(i386_state *cpustate)        // Opcode 0x8a
 {
        UINT8 src;
        UINT8 modrm = FETCH(cpustate);
@@ -639,7 +639,7 @@ static void I386OP(mov_r8_rm8)(i386_state *cpustate)        // Opcode 0x8a
        }
 }
 
-static void I386OP(mov_rm8_i8)(i386_state *cpustate)        // Opcode 0xc6
+static void __FASTCALL I386OP(mov_rm8_i8)(i386_state *cpustate)        // Opcode 0xc6
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -655,7 +655,7 @@ static void I386OP(mov_rm8_i8)(i386_state *cpustate)        // Opcode 0xc6
 }
 //#include "./i386ctrlregdefs.h"
 
-static void I386OP(mov_r32_cr)(i386_state *cpustate)        // Opcode 0x0f 20
+static void __FASTCALL I386OP(mov_r32_cr)(i386_state *cpustate)        // Opcode 0x0f 20
 {
 #if 1
        UINT32 oldpc = cpustate->prev_pc;
@@ -724,7 +724,7 @@ void i386_change_paging_mode(i386_state *cpustate, int val)
        // ToDo: Implement paging bit.
 }
 
-static void I386OP(mov_r32_dr)(i386_state *cpustate)        // Opcode 0x0f 21
+static void __FASTCALL I386OP(mov_r32_dr)(i386_state *cpustate)        // Opcode 0x0f 21
 {
        UINT8 modrm = FETCH(cpustate);
        if(modrm < 0xc0) {
@@ -770,7 +770,7 @@ static void I386OP(mov_r32_dr)(i386_state *cpustate)        // Opcode 0x0f 21
        }
 }
 
-static void I386OP(mov_cr_r32)(i386_state *cpustate)        // Opcode 0x0f 22
+static void __FASTCALL I386OP(mov_cr_r32)(i386_state *cpustate)        // Opcode 0x0f 22
 {
 #if 1
        UINT32 oldpc = cpustate->prev_pc;
@@ -912,7 +912,7 @@ static void I386OP(mov_cr_r32)(i386_state *cpustate)        // Opcode 0x0f 22
 #endif
 }
 
-static void I386OP(mov_dr_r32)(i386_state *cpustate)        // Opcode 0x0f 23
+static void __FASTCALL I386OP(mov_dr_r32)(i386_state *cpustate)        // Opcode 0x0f 23
 {
        UINT8 modrm = FETCH(cpustate);
        if(modrm < 0xc0) {
@@ -958,7 +958,7 @@ static void I386OP(mov_dr_r32)(i386_state *cpustate)        // Opcode 0x0f 23
        }
 }
 
-static void I386OP(mov_al_m8)(i386_state *cpustate)         // Opcode 0xa0
+static void __FASTCALL I386OP(mov_al_m8)(i386_state *cpustate)         // Opcode 0xa0
 {
        UINT32 offset, ea;
        if( cpustate->address_size ) {
@@ -976,7 +976,7 @@ static void I386OP(mov_al_m8)(i386_state *cpustate)         // Opcode 0xa0
        CYCLES(cpustate,CYCLES_MOV_IMM_MEM);
 }
 
-static void I386OP(mov_m8_al)(i386_state *cpustate)         // Opcode 0xa2
+static void __FASTCALL I386OP(mov_m8_al)(i386_state *cpustate)         // Opcode 0xa2
 {
        UINT32 offset, ea;
        if( cpustate->address_size ) {
@@ -994,7 +994,7 @@ static void I386OP(mov_m8_al)(i386_state *cpustate)         // Opcode 0xa2
        CYCLES(cpustate,CYCLES_MOV_MEM_ACC);
 }
 
-static void I386OP(mov_rm16_sreg)(i386_state *cpustate)     // Opcode 0x8c
+static void __FASTCALL I386OP(mov_rm16_sreg)(i386_state *cpustate)     // Opcode 0x8c
 {
        UINT8 modrm = FETCH(cpustate);
        int s = (modrm >> 3) & 0x7;
@@ -1012,7 +1012,7 @@ static void I386OP(mov_rm16_sreg)(i386_state *cpustate)     // Opcode 0x8c
        }
 }
 
-static void I386OP(mov_sreg_rm16)(i386_state *cpustate)     // Opcode 0x8e
+static void __FASTCALL I386OP(mov_sreg_rm16)(i386_state *cpustate)     // Opcode 0x8e
 {
        UINT16 selector;
        UINT8 modrm = FETCH(cpustate);
@@ -1039,55 +1039,55 @@ static void I386OP(mov_sreg_rm16)(i386_state *cpustate)     // Opcode 0x8e
        }
 }
 
-static void I386OP(mov_al_i8)(i386_state *cpustate)         // Opcode 0xb0
+static void __FASTCALL I386OP(mov_al_i8)(i386_state *cpustate)         // Opcode 0xb0
 {
        REG8(AL) = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_cl_i8)(i386_state *cpustate)         // Opcode 0xb1
+static void __FASTCALL I386OP(mov_cl_i8)(i386_state *cpustate)         // Opcode 0xb1
 {
        REG8(CL) = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_dl_i8)(i386_state *cpustate)         // Opcode 0xb2
+static void __FASTCALL I386OP(mov_dl_i8)(i386_state *cpustate)         // Opcode 0xb2
 {
        REG8(DL) = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_bl_i8)(i386_state *cpustate)         // Opcode 0xb3
+static void __FASTCALL I386OP(mov_bl_i8)(i386_state *cpustate)         // Opcode 0xb3
 {
        REG8(BL) = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_ah_i8)(i386_state *cpustate)         // Opcode 0xb4
+static void __FASTCALL I386OP(mov_ah_i8)(i386_state *cpustate)         // Opcode 0xb4
 {
        REG8(AH) = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_ch_i8)(i386_state *cpustate)         // Opcode 0xb5
+static void __FASTCALL I386OP(mov_ch_i8)(i386_state *cpustate)         // Opcode 0xb5
 {
        REG8(CH) = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_dh_i8)(i386_state *cpustate)         // Opcode 0xb6
+static void __FASTCALL I386OP(mov_dh_i8)(i386_state *cpustate)         // Opcode 0xb6
 {
        REG8(DH) = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(mov_bh_i8)(i386_state *cpustate)         // Opcode 0xb7
+static void __FASTCALL I386OP(mov_bh_i8)(i386_state *cpustate)         // Opcode 0xb7
 {
        REG8(BH) = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_MOV_IMM_REG);
 }
 
-static void I386OP(movsb)(i386_state *cpustate)             // Opcode 0xa4
+static void __FASTCALL I386OP(movsb)(i386_state *cpustate)             // Opcode 0xa4
 {
        UINT32 eas, ead;
        UINT8 v;
@@ -1104,7 +1104,7 @@ static void I386OP(movsb)(i386_state *cpustate)             // Opcode 0xa4
        CYCLES(cpustate,CYCLES_MOVS);
 }
 
-static void I386OP(or_rm8_r8)(i386_state *cpustate)         // Opcode 0x08
+static void __FASTCALL I386OP(or_rm8_r8)(i386_state *cpustate)         // Opcode 0x08
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1124,7 +1124,7 @@ static void I386OP(or_rm8_r8)(i386_state *cpustate)         // Opcode 0x08
        }
 }
 
-static void I386OP(or_r8_rm8)(i386_state *cpustate)         // Opcode 0x0a
+static void __FASTCALL I386OP(or_r8_rm8)(i386_state *cpustate)         // Opcode 0x0a
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1144,7 +1144,7 @@ static void I386OP(or_r8_rm8)(i386_state *cpustate)         // Opcode 0x0a
        }
 }
 
-static void I386OP(or_al_i8)(i386_state *cpustate)          // Opcode 0x0c
+static void __FASTCALL I386OP(or_al_i8)(i386_state *cpustate)          // Opcode 0x0c
 {
        UINT8 src, dst;
        src = FETCH(cpustate);
@@ -1154,7 +1154,7 @@ static void I386OP(or_al_i8)(i386_state *cpustate)          // Opcode 0x0c
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(out_al_i8)(i386_state *cpustate)         // Opcode 0xe6
+static void __FASTCALL I386OP(out_al_i8)(i386_state *cpustate)         // Opcode 0xe6
 {
        UINT16 port = FETCH(cpustate);
        UINT8 data = REG8(AL);
@@ -1162,7 +1162,7 @@ static void I386OP(out_al_i8)(i386_state *cpustate)         // Opcode 0xe6
        CYCLES(cpustate,CYCLES_OUT_VAR);
 }
 
-static void I386OP(out_al_dx)(i386_state *cpustate)         // Opcode 0xee
+static void __FASTCALL I386OP(out_al_dx)(i386_state *cpustate)         // Opcode 0xee
 {
        UINT16 port = REG16(DX);
        UINT8 data = REG8(AL);
@@ -1171,7 +1171,7 @@ static void I386OP(out_al_dx)(i386_state *cpustate)         // Opcode 0xee
 }
 
 
-static void I386OP(arpl)(i386_state *cpustate)           // Opcode 0x63
+static void __FASTCALL I386OP(arpl)(i386_state *cpustate)           // Opcode 0x63
 {
        UINT16 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1205,14 +1205,14 @@ static void I386OP(arpl)(i386_state *cpustate)           // Opcode 0x63
        }
 }
 
-static void I386OP(push_i8)(i386_state *cpustate)           // Opcode 0x6a
+static void __FASTCALL I386OP(push_i8)(i386_state *cpustate)           // Opcode 0x6a
 {
        UINT8 value = FETCH(cpustate);
        PUSH8(cpustate,value);
        CYCLES(cpustate,CYCLES_PUSH_IMM);
 }
 
-static void I386OP(ins_generic)(i386_state *cpustate, int size)
+static void __FASTCALL I386OP(ins_generic)(i386_state *cpustate, int size)
 {
        UINT32 ead;
        UINT8 vb;
@@ -1243,22 +1243,22 @@ static void I386OP(ins_generic)(i386_state *cpustate, int size)
        CYCLES(cpustate,CYCLES_INS);    // TODO: Confirm this value
 }
 
-static void I386OP(insb)(i386_state *cpustate)              // Opcode 0x6c
+static void __FASTCALL I386OP(insb)(i386_state *cpustate)              // Opcode 0x6c
 {
        I386OP(ins_generic)(cpustate, 1);
 }
 
-static void I386OP(insw)(i386_state *cpustate)              // Opcode 0x6d
+static void __FASTCALL I386OP(insw)(i386_state *cpustate)              // Opcode 0x6d
 {
        I386OP(ins_generic)(cpustate, 2);
 }
 
-static void I386OP(insd)(i386_state *cpustate)              // Opcode 0x6d
+static void __FASTCALL I386OP(insd)(i386_state *cpustate)              // Opcode 0x6d
 {
        I386OP(ins_generic)(cpustate, 4);
 }
 
-static void I386OP(outs_generic)(i386_state *cpustate, int size)
+static void __FASTCALL I386OP(outs_generic)(i386_state *cpustate, int size)
 {
        UINT32 eas;
        UINT8 vb;
@@ -1293,22 +1293,22 @@ static void I386OP(outs_generic)(i386_state *cpustate, int size)
        CYCLES(cpustate,CYCLES_OUTS);   // TODO: Confirm this value
 }
 
-static void I386OP(outsb)(i386_state *cpustate)             // Opcode 0x6e
+static void __FASTCALL I386OP(outsb)(i386_state *cpustate)             // Opcode 0x6e
 {
        I386OP(outs_generic)(cpustate, 1);
 }
 
-static void I386OP(outsw)(i386_state *cpustate)             // Opcode 0x6f
+static void __FASTCALL I386OP(outsw)(i386_state *cpustate)             // Opcode 0x6f
 {
        I386OP(outs_generic)(cpustate, 2);
 }
 
-static void I386OP(outsd)(i386_state *cpustate)             // Opcode 0x6f
+static void __FASTCALL I386OP(outsd)(i386_state *cpustate)             // Opcode 0x6f
 {
        I386OP(outs_generic)(cpustate, 4);
 }
 
-static void I386OP(repeat)(i386_state *cpustate, int invert_flag)
+static void __FASTCALL I386OP(repeat)(i386_state *cpustate, int invert_flag)
 {
        UINT32 repeated_eip = cpustate->eip;
        UINT32 repeated_pc = cpustate->pc;
@@ -1500,23 +1500,23 @@ outofcycles:
        CYCLES_NUM(-cycle_base);
 }
 
-static void I386OP(rep)(i386_state *cpustate)               // Opcode 0xf3
+static void __FASTCALL I386OP(rep)(i386_state *cpustate)               // Opcode 0xf3
 {
        I386OP(repeat)(cpustate, 0);
 }
 
-static void I386OP(repne)(i386_state *cpustate)             // Opcode 0xf2
+static void __FASTCALL I386OP(repne)(i386_state *cpustate)             // Opcode 0xf2
 {
        I386OP(repeat)(cpustate, 1);
 }
 
-static void I386OP(sahf)(i386_state *cpustate)              // Opcode 0x9e
+static void __FASTCALL I386OP(sahf)(i386_state *cpustate)              // Opcode 0x9e
 {
        set_flags(cpustate, (get_flags(cpustate) & 0xffffff00) | (REG8(AH) & 0xd7) );
        CYCLES(cpustate,CYCLES_SAHF);
 }
 
-static void I386OP(sbb_rm8_r8)(i386_state *cpustate)        // Opcode 0x18
+static void __FASTCALL I386OP(sbb_rm8_r8)(i386_state *cpustate)        // Opcode 0x18
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1536,7 +1536,7 @@ static void I386OP(sbb_rm8_r8)(i386_state *cpustate)        // Opcode 0x18
        }
 }
 
-static void I386OP(sbb_r8_rm8)(i386_state *cpustate)        // Opcode 0x1a
+static void __FASTCALL I386OP(sbb_r8_rm8)(i386_state *cpustate)        // Opcode 0x1a
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1556,7 +1556,7 @@ static void I386OP(sbb_r8_rm8)(i386_state *cpustate)        // Opcode 0x1a
        }
 }
 
-static void I386OP(sbb_al_i8)(i386_state *cpustate)         // Opcode 0x1c
+static void __FASTCALL I386OP(sbb_al_i8)(i386_state *cpustate)         // Opcode 0x1c
 {
        UINT8 src, dst;
        src = FETCH(cpustate);
@@ -1566,7 +1566,7 @@ static void I386OP(sbb_al_i8)(i386_state *cpustate)         // Opcode 0x1c
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(scasb)(i386_state *cpustate)             // Opcode 0xae
+static void __FASTCALL I386OP(scasb)(i386_state *cpustate)             // Opcode 0xae
 {
        UINT32 eas;
        UINT8 src, dst;
@@ -1578,7 +1578,7 @@ static void I386OP(scasb)(i386_state *cpustate)             // Opcode 0xae
        CYCLES(cpustate,CYCLES_SCAS);
 }
 
-static void I386OP(setalc)(i386_state *cpustate)            // Opcode 0xd6 (undocumented)
+static void __FASTCALL I386OP(setalc)(i386_state *cpustate)            // Opcode 0xd6 (undocumented)
 {
        if( cpustate->CF ) {
                REG8(AL) = 0xff;
@@ -1588,7 +1588,7 @@ static void I386OP(setalc)(i386_state *cpustate)            // Opcode 0xd6 (undo
        CYCLES(cpustate,3);
 }
 
-static void I386OP(seta_rm8)(i386_state *cpustate)          // Opcode 0x0f 97
+static void __FASTCALL I386OP(seta_rm8)(i386_state *cpustate)          // Opcode 0x0f 97
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1605,7 +1605,7 @@ static void I386OP(seta_rm8)(i386_state *cpustate)          // Opcode 0x0f 97
        }
 }
 
-static void I386OP(setbe_rm8)(i386_state *cpustate)         // Opcode 0x0f 96
+static void __FASTCALL I386OP(setbe_rm8)(i386_state *cpustate)         // Opcode 0x0f 96
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1622,7 +1622,7 @@ static void I386OP(setbe_rm8)(i386_state *cpustate)         // Opcode 0x0f 96
        }
 }
 
-static void I386OP(setc_rm8)(i386_state *cpustate)          // Opcode 0x0f 92
+static void __FASTCALL I386OP(setc_rm8)(i386_state *cpustate)          // Opcode 0x0f 92
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1639,7 +1639,7 @@ static void I386OP(setc_rm8)(i386_state *cpustate)          // Opcode 0x0f 92
        }
 }
 
-static void I386OP(setg_rm8)(i386_state *cpustate)          // Opcode 0x0f 9f
+static void __FASTCALL I386OP(setg_rm8)(i386_state *cpustate)          // Opcode 0x0f 9f
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1656,7 +1656,7 @@ static void I386OP(setg_rm8)(i386_state *cpustate)          // Opcode 0x0f 9f
        }
 }
 
-static void I386OP(setge_rm8)(i386_state *cpustate)         // Opcode 0x0f 9d
+static void __FASTCALL I386OP(setge_rm8)(i386_state *cpustate)         // Opcode 0x0f 9d
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1673,7 +1673,7 @@ static void I386OP(setge_rm8)(i386_state *cpustate)         // Opcode 0x0f 9d
        }
 }
 
-static void I386OP(setl_rm8)(i386_state *cpustate)          // Opcode 0x0f 9c
+static void __FASTCALL I386OP(setl_rm8)(i386_state *cpustate)          // Opcode 0x0f 9c
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1690,7 +1690,7 @@ static void I386OP(setl_rm8)(i386_state *cpustate)          // Opcode 0x0f 9c
        }
 }
 
-static void I386OP(setle_rm8)(i386_state *cpustate)         // Opcode 0x0f 9e
+static void __FASTCALL I386OP(setle_rm8)(i386_state *cpustate)         // Opcode 0x0f 9e
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1707,7 +1707,7 @@ static void I386OP(setle_rm8)(i386_state *cpustate)         // Opcode 0x0f 9e
        }
 }
 
-static void I386OP(setnc_rm8)(i386_state *cpustate)         // Opcode 0x0f 93
+static void __FASTCALL I386OP(setnc_rm8)(i386_state *cpustate)         // Opcode 0x0f 93
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1724,7 +1724,7 @@ static void I386OP(setnc_rm8)(i386_state *cpustate)         // Opcode 0x0f 93
        }
 }
 
-static void I386OP(setno_rm8)(i386_state *cpustate)         // Opcode 0x0f 91
+static void __FASTCALL I386OP(setno_rm8)(i386_state *cpustate)         // Opcode 0x0f 91
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1741,7 +1741,7 @@ static void I386OP(setno_rm8)(i386_state *cpustate)         // Opcode 0x0f 91
        }
 }
 
-static void I386OP(setnp_rm8)(i386_state *cpustate)         // Opcode 0x0f 9b
+static void __FASTCALL I386OP(setnp_rm8)(i386_state *cpustate)         // Opcode 0x0f 9b
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1758,7 +1758,7 @@ static void I386OP(setnp_rm8)(i386_state *cpustate)         // Opcode 0x0f 9b
        }
 }
 
-static void I386OP(setns_rm8)(i386_state *cpustate)         // Opcode 0x0f 99
+static void __FASTCALL I386OP(setns_rm8)(i386_state *cpustate)         // Opcode 0x0f 99
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1775,7 +1775,7 @@ static void I386OP(setns_rm8)(i386_state *cpustate)         // Opcode 0x0f 99
        }
 }
 
-static void I386OP(setnz_rm8)(i386_state *cpustate)         // Opcode 0x0f 95
+static void __FASTCALL I386OP(setnz_rm8)(i386_state *cpustate)         // Opcode 0x0f 95
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1792,7 +1792,7 @@ static void I386OP(setnz_rm8)(i386_state *cpustate)         // Opcode 0x0f 95
        }
 }
 
-static void I386OP(seto_rm8)(i386_state *cpustate)          // Opcode 0x0f 90
+static void __FASTCALL I386OP(seto_rm8)(i386_state *cpustate)          // Opcode 0x0f 90
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1809,7 +1809,7 @@ static void I386OP(seto_rm8)(i386_state *cpustate)          // Opcode 0x0f 90
        }
 }
 
-static void I386OP(setp_rm8)(i386_state *cpustate)          // Opcode 0x0f 9a
+static void __FASTCALL I386OP(setp_rm8)(i386_state *cpustate)          // Opcode 0x0f 9a
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1826,7 +1826,7 @@ static void I386OP(setp_rm8)(i386_state *cpustate)          // Opcode 0x0f 9a
        }
 }
 
-static void I386OP(sets_rm8)(i386_state *cpustate)          // Opcode 0x0f 98
+static void __FASTCALL I386OP(sets_rm8)(i386_state *cpustate)          // Opcode 0x0f 98
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1843,7 +1843,7 @@ static void I386OP(sets_rm8)(i386_state *cpustate)          // Opcode 0x0f 98
        }
 }
 
-static void I386OP(setz_rm8)(i386_state *cpustate)          // Opcode 0x0f 94
+static void __FASTCALL I386OP(setz_rm8)(i386_state *cpustate)          // Opcode 0x0f 94
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 value = 0;
@@ -1860,19 +1860,19 @@ static void I386OP(setz_rm8)(i386_state *cpustate)          // Opcode 0x0f 94
        }
 }
 
-static void I386OP(stc)(i386_state *cpustate)               // Opcode 0xf9
+static void __FASTCALL I386OP(stc)(i386_state *cpustate)               // Opcode 0xf9
 {
        cpustate->CF = 1;
        CYCLES(cpustate,CYCLES_STC);
 }
 
-static void I386OP(std)(i386_state *cpustate)               // Opcode 0xfd
+static void __FASTCALL I386OP(std)(i386_state *cpustate)               // Opcode 0xfd
 {
        cpustate->DF = 1;
        CYCLES(cpustate,CYCLES_STD);
 }
 
-static void I386OP(sti)(i386_state *cpustate)               // Opcode 0xfb
+static void __FASTCALL I386OP(sti)(i386_state *cpustate)               // Opcode 0xfb
 {
        if(PROTECTED_MODE)
        {
@@ -1887,7 +1887,7 @@ static void I386OP(sti)(i386_state *cpustate)               // Opcode 0xfb
        CYCLES(cpustate,CYCLES_STI);
 }
 
-static void I386OP(stosb)(i386_state *cpustate)             // Opcode 0xaa
+static void __FASTCALL I386OP(stosb)(i386_state *cpustate)             // Opcode 0xaa
 {
        UINT32 ead;
        ead = i386_translate(cpustate, ES, cpustate->address_size ? REG32(EDI) : REG16(DI), 1, 1 );
@@ -1896,7 +1896,7 @@ static void I386OP(stosb)(i386_state *cpustate)             // Opcode 0xaa
        CYCLES(cpustate,CYCLES_STOS);
 }
 
-static void I386OP(sub_rm8_r8)(i386_state *cpustate)        // Opcode 0x28
+static void __FASTCALL I386OP(sub_rm8_r8)(i386_state *cpustate)        // Opcode 0x28
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1916,7 +1916,7 @@ static void I386OP(sub_rm8_r8)(i386_state *cpustate)        // Opcode 0x28
        }
 }
 
-static void I386OP(sub_r8_rm8)(i386_state *cpustate)        // Opcode 0x2a
+static void __FASTCALL I386OP(sub_r8_rm8)(i386_state *cpustate)        // Opcode 0x2a
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1936,7 +1936,7 @@ static void I386OP(sub_r8_rm8)(i386_state *cpustate)        // Opcode 0x2a
        }
 }
 
-static void I386OP(sub_al_i8)(i386_state *cpustate)         // Opcode 0x2c
+static void __FASTCALL I386OP(sub_al_i8)(i386_state *cpustate)         // Opcode 0x2c
 {
        UINT8 src, dst;
        src = FETCH(cpustate);
@@ -1946,7 +1946,7 @@ static void I386OP(sub_al_i8)(i386_state *cpustate)         // Opcode 0x2c
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(test_al_i8)(i386_state *cpustate)        // Opcode 0xa8
+static void __FASTCALL I386OP(test_al_i8)(i386_state *cpustate)        // Opcode 0xa8
 {
        UINT8 src = FETCH(cpustate);
        UINT8 dst = REG8(AL);
@@ -1957,7 +1957,7 @@ static void I386OP(test_al_i8)(i386_state *cpustate)        // Opcode 0xa8
        CYCLES(cpustate,CYCLES_ALU_IMM_ACC);
 }
 
-static void I386OP(test_rm8_r8)(i386_state *cpustate)       // Opcode 0x84
+static void __FASTCALL I386OP(test_rm8_r8)(i386_state *cpustate)       // Opcode 0x84
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -1981,7 +1981,7 @@ static void I386OP(test_rm8_r8)(i386_state *cpustate)       // Opcode 0x84
        }
 }
 
-static void I386OP(xchg_r8_rm8)(i386_state *cpustate)       // Opcode 0x86
+static void __FASTCALL I386OP(xchg_r8_rm8)(i386_state *cpustate)       // Opcode 0x86
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2000,7 +2000,7 @@ static void I386OP(xchg_r8_rm8)(i386_state *cpustate)       // Opcode 0x86
        }
 }
 
-static void I386OP(xor_rm8_r8)(i386_state *cpustate)        // Opcode 0x30
+static void __FASTCALL I386OP(xor_rm8_r8)(i386_state *cpustate)        // Opcode 0x30
 {
        UINT8 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2020,7 +2020,7 @@ static void I386OP(xor_rm8_r8)(i386_state *cpustate)        // Opcode 0x30
        }
 }
 
-static void I386OP(xor_r8_rm8)(i386_state *cpustate)        // Opcode 0x32
+static void __FASTCALL I386OP(xor_r8_rm8)(i386_state *cpustate)        // Opcode 0x32
 {
        UINT32 src, dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2040,7 +2040,7 @@ static void I386OP(xor_r8_rm8)(i386_state *cpustate)        // Opcode 0x32
        }
 }
 
-static void I386OP(xor_al_i8)(i386_state *cpustate)         // Opcode 0x34
+static void __FASTCALL I386OP(xor_al_i8)(i386_state *cpustate)         // Opcode 0x34
 {
        UINT8 src, dst;
        src = FETCH(cpustate);
@@ -2052,7 +2052,7 @@ static void I386OP(xor_al_i8)(i386_state *cpustate)         // Opcode 0x34
 
 
 
-static void I386OP(group80_8)(i386_state *cpustate)         // Opcode 0x80
+static void __FASTCALL I386OP(group80_8)(i386_state *cpustate)         // Opcode 0x80
 {
        UINT32 ea;
        UINT8 src, dst;
@@ -2189,7 +2189,7 @@ static void I386OP(group80_8)(i386_state *cpustate)         // Opcode 0x80
        }
 }
 
-static void I386OP(groupC0_8)(i386_state *cpustate)         // Opcode 0xc0
+static void __FASTCALL I386OP(groupC0_8)(i386_state *cpustate)         // Opcode 0xc0
 {
        UINT8 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2209,7 +2209,7 @@ static void I386OP(groupC0_8)(i386_state *cpustate)         // Opcode 0xc0
        }
 }
 
-static void I386OP(groupD0_8)(i386_state *cpustate)         // Opcode 0xd0
+static void __FASTCALL I386OP(groupD0_8)(i386_state *cpustate)         // Opcode 0xd0
 {
        UINT8 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2226,7 +2226,7 @@ static void I386OP(groupD0_8)(i386_state *cpustate)         // Opcode 0xd0
        }
 }
 
-static void I386OP(groupD2_8)(i386_state *cpustate)         // Opcode 0xd2
+static void __FASTCALL I386OP(groupD2_8)(i386_state *cpustate)         // Opcode 0xd2
 {
        UINT8 dst;
        UINT8 modrm = FETCH(cpustate);
@@ -2243,7 +2243,7 @@ static void I386OP(groupD2_8)(i386_state *cpustate)         // Opcode 0xd2
        }
 }
 
-static void I386OP(groupF6_8)(i386_state *cpustate)         // Opcode 0xf6
+static void __FASTCALL I386OP(groupF6_8)(i386_state *cpustate)         // Opcode 0xf6
 {
        UINT8 modrm = FETCH(cpustate);
 
@@ -2405,7 +2405,7 @@ static void I386OP(groupF6_8)(i386_state *cpustate)         // Opcode 0xf6
        }
 }
 
-static void I386OP(groupFE_8)(i386_state *cpustate)         // Opcode 0xfe
+static void __FASTCALL I386OP(groupFE_8)(i386_state *cpustate)         // Opcode 0xfe
 {
        UINT8 modrm = FETCH(cpustate);
 
@@ -2464,7 +2464,7 @@ static void I386OP(groupFE_8)(i386_state *cpustate)         // Opcode 0xfe
 
 
 
-static void I386OP(segment_CS)(i386_state *cpustate)        // Opcode 0x2e
+static void __FASTCALL I386OP(segment_CS)(i386_state *cpustate)        // Opcode 0x2e
 {
        cpustate->segment_prefix = 1;
        cpustate->segment_override = CS;
@@ -2472,7 +2472,7 @@ static void I386OP(segment_CS)(i386_state *cpustate)        // Opcode 0x2e
        I386OP(decode_opcode)(cpustate);
 }
 
-static void I386OP(segment_DS)(i386_state *cpustate)        // Opcode 0x3e
+static void __FASTCALL I386OP(segment_DS)(i386_state *cpustate)        // Opcode 0x3e
 {
        cpustate->segment_prefix = 1;
        cpustate->segment_override = DS;
@@ -2480,7 +2480,7 @@ static void I386OP(segment_DS)(i386_state *cpustate)        // Opcode 0x3e
        I386OP(decode_opcode)(cpustate);
 }
 
-static void I386OP(segment_ES)(i386_state *cpustate)        // Opcode 0x26
+static void __FASTCALL I386OP(segment_ES)(i386_state *cpustate)        // Opcode 0x26
 {
        cpustate->segment_prefix = 1;
        cpustate->segment_override = ES;
@@ -2488,7 +2488,7 @@ static void I386OP(segment_ES)(i386_state *cpustate)        // Opcode 0x26
        I386OP(decode_opcode)(cpustate);
 }
 
-static void I386OP(segment_FS)(i386_state *cpustate)        // Opcode 0x64
+static void __FASTCALL I386OP(segment_FS)(i386_state *cpustate)        // Opcode 0x64
 {
        cpustate->segment_prefix = 1;
        cpustate->segment_override = FS;
@@ -2496,7 +2496,7 @@ static void I386OP(segment_FS)(i386_state *cpustate)        // Opcode 0x64
        I386OP(decode_opcode)(cpustate);
 }
 
-static void I386OP(segment_GS)(i386_state *cpustate)        // Opcode 0x65
+static void __FASTCALL I386OP(segment_GS)(i386_state *cpustate)        // Opcode 0x65
 {
        cpustate->segment_prefix = 1;
        cpustate->segment_override = GS;
@@ -2504,7 +2504,7 @@ static void I386OP(segment_GS)(i386_state *cpustate)        // Opcode 0x65
        I386OP(decode_opcode)(cpustate);
 }
 
-static void I386OP(segment_SS)(i386_state *cpustate)        // Opcode 0x36
+static void __FASTCALL I386OP(segment_SS)(i386_state *cpustate)        // Opcode 0x36
 {
        cpustate->segment_prefix = 1;
        cpustate->segment_override = SS;
@@ -2512,7 +2512,7 @@ static void I386OP(segment_SS)(i386_state *cpustate)        // Opcode 0x36
        I386OP(decode_opcode)(cpustate);
 }
 
-static void I386OP(operand_size)(i386_state *cpustate)      // Opcode prefix 0x66
+static void __FASTCALL I386OP(operand_size)(i386_state *cpustate)      // Opcode prefix 0x66
 {
        if(cpustate->operand_prefix == 0)
        {
@@ -2532,7 +2532,7 @@ static void I386OP(operand_size)(i386_state *cpustate)      // Opcode prefix 0x6
        }
 }
 
-static void I386OP(address_size)(i386_state *cpustate)      // Opcode 0x67
+static void __FASTCALL I386OP(address_size)(i386_state *cpustate)      // Opcode 0x67
 {
        if(cpustate->address_prefix == 0)
        {
@@ -2542,12 +2542,12 @@ static void I386OP(address_size)(i386_state *cpustate)      // Opcode 0x67
        I386OP(decode_opcode)(cpustate);
 }
 
-static void I386OP(nop)(i386_state *cpustate)               // Opcode 0x90
+static void __FASTCALL I386OP(nop)(i386_state *cpustate)               // Opcode 0x90
 {
        CYCLES(cpustate,CYCLES_NOP);
 }
 
-static void I386OP(int3)(i386_state *cpustate)              // Opcode 0xcc
+static void __FASTCALL I386OP(int3)(i386_state *cpustate)              // Opcode 0xcc
 {
        CYCLES(cpustate,CYCLES_INT3);
        logerror("INT3 at %08X\n", cpustate->pc - 1);
@@ -2556,7 +2556,7 @@ static void I386OP(int3)(i386_state *cpustate)              // Opcode 0xcc
        cpustate->ext = 1;
 }
 
-static void I386OP(int_16)(i386_state *cpustate)               // Opcode 0xcd
+static void __FASTCALL I386OP(int_16)(i386_state *cpustate)               // Opcode 0xcd
 {
        int interrupt = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_INT);
@@ -2568,7 +2568,7 @@ static void I386OP(int_16)(i386_state *cpustate)               // Opcode 0xcd
        cpustate->ext = 1;
 }
 
-static void I386OP(int_32)(i386_state *cpustate)               // Opcode 0xcd
+static void __FASTCALL I386OP(int_32)(i386_state *cpustate)               // Opcode 0xcd
 {
        int interrupt = FETCH(cpustate);
        CYCLES(cpustate,CYCLES_INT);
@@ -2580,7 +2580,7 @@ static void I386OP(int_32)(i386_state *cpustate)               // Opcode 0xcd
        cpustate->ext = 1;
 }
 
-static void I386OP(into)(i386_state *cpustate)              // Opcode 0xce
+static void __FASTCALL I386OP(into)(i386_state *cpustate)              // Opcode 0xce
 {
        if( cpustate->OF ) {
                cpustate->ext = 0;
@@ -2595,7 +2595,7 @@ static void I386OP(into)(i386_state *cpustate)              // Opcode 0xce
 }
 
 static UINT32 i386_escape_ea;   // hack around GCC 4.6 error because we need the side effects of GetEA()
-static void I386OP(escape)(i386_state *cpustate)            // Opcodes 0xd8 - 0xdf
+static void __FASTCALL I386OP(escape)(i386_state *cpustate)            // Opcodes 0xd8 - 0xdf
 {
        UINT8 modrm = FETCH(cpustate);
        if(modrm < 0xc0)
@@ -2606,7 +2606,7 @@ static void I386OP(escape)(i386_state *cpustate)            // Opcodes 0xd8 - 0x
        (void) LOAD_RM8(modrm);
 }
 
-static void I386OP(hlt)(i386_state *cpustate)               // Opcode 0xf4
+static void __FASTCALL I386OP(hlt)(i386_state *cpustate)               // Opcode 0xf4
 {
        if(PROTECTED_MODE && cpustate->CPL != 0) {
                logerror("Call from no-supervisor privilege: I386OP(hlt)");
@@ -2621,7 +2621,7 @@ static void I386OP(hlt)(i386_state *cpustate)               // Opcode 0xf4
                cpustate->cycles = 0;
 }
 
-static void I386OP(decimal_adjust)(i386_state *cpustate, int direction)
+static void __FASTCALL I386OP(decimal_adjust)(i386_state *cpustate, int direction)
 {
        UINT8 tmpAL = REG8(AL);
        UINT8 tmpCF = cpustate->CF;
@@ -2646,19 +2646,19 @@ static void I386OP(decimal_adjust)(i386_state *cpustate, int direction)
        SetSZPF8(REG8(AL));
 }
 
-static void I386OP(daa)(i386_state *cpustate)               // Opcode 0x27
+static void __FASTCALL I386OP(daa)(i386_state *cpustate)               // Opcode 0x27
 {
        I386OP(decimal_adjust)(cpustate, +1);
        CYCLES(cpustate,CYCLES_DAA);
 }
 
-static void I386OP(das)(i386_state *cpustate)               // Opcode 0x2f
+static void __FASTCALL I386OP(das)(i386_state *cpustate)               // Opcode 0x2f
 {
        I386OP(decimal_adjust)(cpustate, -1);
        CYCLES(cpustate,CYCLES_DAS);
 }
 
-static void I386OP(aaa)(i386_state *cpustate)               // Opcode 0x37
+static void __FASTCALL I386OP(aaa)(i386_state *cpustate)               // Opcode 0x37
 {
        if( ( (REG8(AL) & 0x0f) > 9) || (cpustate->AF != 0) ) {
                REG16(AX) = REG16(AX) + 6;
@@ -2673,7 +2673,7 @@ static void I386OP(aaa)(i386_state *cpustate)               // Opcode 0x37
        CYCLES(cpustate,CYCLES_AAA);
 }
 
-static void I386OP(aas)(i386_state *cpustate)               // Opcode 0x3f
+static void __FASTCALL I386OP(aas)(i386_state *cpustate)               // Opcode 0x3f
 {
        if (cpustate->AF || ((REG8(AL) & 0xf) > 9))
        {
@@ -2691,7 +2691,7 @@ static void I386OP(aas)(i386_state *cpustate)               // Opcode 0x3f
        CYCLES(cpustate,CYCLES_AAS);
 }
 
-static void I386OP(aad)(i386_state *cpustate)               // Opcode 0xd5
+static void __FASTCALL I386OP(aad)(i386_state *cpustate)               // Opcode 0xd5
 {
        UINT8 tempAL = REG8(AL);
        UINT8 tempAH = REG8(AH);
@@ -2703,7 +2703,7 @@ static void I386OP(aad)(i386_state *cpustate)               // Opcode 0xd5
        CYCLES(cpustate,CYCLES_AAD);
 }
 
-static void I386OP(aam)(i386_state *cpustate)               // Opcode 0xd4
+static void __FASTCALL I386OP(aam)(i386_state *cpustate)               // Opcode 0xd4
 {
        UINT8 tempAL = REG8(AL);
        UINT8 i = FETCH(cpustate);
@@ -2720,7 +2720,7 @@ static void I386OP(aam)(i386_state *cpustate)               // Opcode 0xd4
        CYCLES(cpustate,CYCLES_AAM);
 }
 
-static void I386OP(clts)(i386_state *cpustate)              // Opcode 0x0f 0x06
+static void __FASTCALL I386OP(clts)(i386_state *cpustate)              // Opcode 0x0f 0x06
 {
        // Privileged instruction, CPL must be zero.  Can be used in real or v86 mode.
        if(PROTECTED_MODE && cpustate->CPL != 0) {
@@ -2732,12 +2732,12 @@ static void I386OP(clts)(i386_state *cpustate)              // Opcode 0x0f 0x06
        CYCLES(cpustate,CYCLES_CLTS);
 }
 
-static void I386OP(wait)(i386_state *cpustate)              // Opcode 0x9B
+static void __FASTCALL I386OP(wait)(i386_state *cpustate)              // Opcode 0x9B
 {
        // TODO
 }
 
-static void I386OP(lock)(i386_state *cpustate)              // Opcode 0xf0
+static void __FASTCALL I386OP(lock)(i386_state *cpustate)              // Opcode 0xf0
 {
        // lock doesn't depend on iopl on 386
        cpustate->lock = true;
@@ -2745,19 +2745,19 @@ static void I386OP(lock)(i386_state *cpustate)              // Opcode 0xf0
        I386OP(decode_opcode)(cpustate);
 }
 
-static void I386OP(mov_r32_tr)(i386_state *cpustate)        // Opcode 0x0f 24
+static void __FASTCALL I386OP(mov_r32_tr)(i386_state *cpustate)        // Opcode 0x0f 24
 {
        FETCH(cpustate);
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void I386OP(mov_tr_r32)(i386_state *cpustate)        // Opcode 0x0f 26
+static void __FASTCALL I386OP(mov_tr_r32)(i386_state *cpustate)        // Opcode 0x0f 26
 {
        FETCH(cpustate);
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void I386OP(loadall)(i386_state *cpustate)       // Opcode 0x0f 0x07 (0x0f 0x05 on 80286), undocumented
+static void __FASTCALL I386OP(loadall)(i386_state *cpustate)       // Opcode 0x0f 0x07 (0x0f 0x05 on 80286), undocumented
 {
        if(PROTECTED_MODE && (cpustate->CPL != 0)) {
                logerror("Call from non-supervisor privilege: I386OP(loadall)"); 
@@ -2825,13 +2825,13 @@ static void I386OP(loadall)(i386_state *cpustate)       // Opcode 0x0f 0x07 (0x0
        CHANGE_PC(cpustate, cpustate->eip);
 }
 
-static void I386OP(invalid)(i386_state *cpustate)
+static void __FASTCALL I386OP(invalid)(i386_state *cpustate)
 {
        report_invalid_opcode(cpustate);
        i386_trap(cpustate, 6, 0, 0);
 }
 
-static void I386OP(xlat)(i386_state *cpustate)          // Opcode 0xd7
+static void __FASTCALL I386OP(xlat)(i386_state *cpustate)          // Opcode 0xd7
 {
        UINT32 ea;
        if( cpustate->segment_prefix ) {
index cc5edbb..c27be95 100644 (file)
@@ -3,8 +3,8 @@
 struct X86_OPCODE {
        UINT8 opcode;
        UINT32 flags;
-       void (*handler16)(i386_state *cpustate);
-       void (*handler32)(i386_state *cpustate);
+       void (__FASTCALL *handler16)(i386_state *cpustate);
+       void (__FASTCALL *handler32)(i386_state *cpustate);
        bool lockable;
 };
 
index 8b7d050..58cad53 100644 (file)
@@ -313,7 +313,7 @@ enum smram_intel_p5
 #define SREG_BITSIZE_FLAGS  4
 
 // ARGS are pair32_t
-INLINE UINT32 sreg_get_base(UINT32 high, UINT32 low)
+INLINE UINT32 __FASTCALL sreg_get_base(UINT32 high, UINT32 low)
 {
        pair32_t ret;
        pair32_t phigh;
@@ -327,7 +327,7 @@ INLINE UINT32 sreg_get_base(UINT32 high, UINT32 low)
        return ret.d;
 }
        
-INLINE UINT32 sreg_get_base_from_u64(UINT64 src)
+INLINE UINT32 __FASTCALL sreg_get_base_from_u64(UINT64 src)
 {
        pair32_t ret;
        pair64_t pa;
@@ -339,7 +339,7 @@ INLINE UINT32 sreg_get_base_from_u64(UINT64 src)
        return ret.d;
 }
 
-INLINE UINT32 sreg_get_limit(UINT32 high, UINT32 low)
+INLINE UINT32 __FASTCALL sreg_get_limit(UINT32 high, UINT32 low)
 {
        pair32_t ret;
        pair32_t phigh;
@@ -352,7 +352,7 @@ INLINE UINT32 sreg_get_limit(UINT32 high, UINT32 low)
        return ret.d;
 }
 
-INLINE UINT32 sreg_get_limit_from_u64(UINT64 src)
+INLINE UINT32 __FASTCALL sreg_get_limit_from_u64(UINT64 src)
 {
        pair32_t ret;
        pair64_t pa;
@@ -363,14 +363,14 @@ INLINE UINT32 sreg_get_limit_from_u64(UINT64 src)
        return ret.d;
 }
 
-INLINE UINT8 sreg_get_flags(UINT32 high)
+INLINE UINT8 __FASTCALL sreg_get_flags(UINT32 high)
 {
        pair32_t phigh;
        phigh.d = high;
        return ((phigh.b.h2 & 0xf0) >> 4);
 }
 
-INLINE UINT8 sreg_get_flags_from_u64(UINT64 src)
+INLINE UINT8 __FASTCALL sreg_get_flags_from_u64(UINT64 src)
 {
        pair32_t ret;
        pair64_t pa;
@@ -379,14 +379,14 @@ INLINE UINT8 sreg_get_flags_from_u64(UINT64 src)
        return ((pa.b.h6 & 0xf0) >> 4);
 }
 
-INLINE UINT8 sreg_get_access_byte(UINT32 high)
+INLINE UINT8 __FASTCALL sreg_get_access_byte(UINT32 high)
 {
        pair32_t phigh;
        phigh.d = high;
        return phigh.b.h;
 }
 
-INLINE UINT8 sreg_get_access_byte_from_u64(UINT64 src)
+INLINE UINT8 __FASTCALL sreg_get_access_byte_from_u64(UINT64 src)
 {
        pair64_t pa;
 
@@ -620,43 +620,43 @@ struct i386_state
        UINT64 x87_inst_ptr;
        UINT16 x87_opcode;
 
-       void (*opcode_table_x87_d8[256])(i386_state *cpustate, UINT8 modrm);
-       void (*opcode_table_x87_d9[256])(i386_state *cpustate, UINT8 modrm);
-       void (*opcode_table_x87_da[256])(i386_state *cpustate, UINT8 modrm);
-       void (*opcode_table_x87_db[256])(i386_state *cpustate, UINT8 modrm);
-       void (*opcode_table_x87_dc[256])(i386_state *cpustate, UINT8 modrm);
-       void (*opcode_table_x87_dd[256])(i386_state *cpustate, UINT8 modrm);
-       void (*opcode_table_x87_de[256])(i386_state *cpustate, UINT8 modrm);
-       void (*opcode_table_x87_df[256])(i386_state *cpustate, UINT8 modrm);
+       void (__FASTCALL *opcode_table_x87_d8[256])(i386_state *cpustate, UINT8 modrm);
+       void (__FASTCALL *opcode_table_x87_d9[256])(i386_state *cpustate, UINT8 modrm);
+       void (__FASTCALL *opcode_table_x87_da[256])(i386_state *cpustate, UINT8 modrm);
+       void (__FASTCALL *opcode_table_x87_db[256])(i386_state *cpustate, UINT8 modrm);
+       void (__FASTCALL *opcode_table_x87_dc[256])(i386_state *cpustate, UINT8 modrm);
+       void (__FASTCALL *opcode_table_x87_dd[256])(i386_state *cpustate, UINT8 modrm);
+       void (__FASTCALL *opcode_table_x87_de[256])(i386_state *cpustate, UINT8 modrm);
+       void (__FASTCALL *opcode_table_x87_df[256])(i386_state *cpustate, UINT8 modrm);
 
        // SSE
        XMM_REG sse_reg[8];
        UINT32 mxcsr;
 
-       void (*opcode_table1_16[256])(i386_state *cpustate);
-       void (*opcode_table1_32[256])(i386_state *cpustate);
-       void (*opcode_table2_16[256])(i386_state *cpustate);
-       void (*opcode_table2_32[256])(i386_state *cpustate);
-       void (*opcode_table338_16[256])(i386_state *cpustate);
-       void (*opcode_table338_32[256])(i386_state *cpustate);
-       void (*opcode_table33a_16[256])(i386_state *cpustate);
-       void (*opcode_table33a_32[256])(i386_state *cpustate);
-       void (*opcode_table366_16[256])(i386_state *cpustate);
-       void (*opcode_table366_32[256])(i386_state *cpustate);
-       void (*opcode_table3f2_16[256])(i386_state *cpustate);
-       void (*opcode_table3f2_32[256])(i386_state *cpustate);
-       void (*opcode_table3f3_16[256])(i386_state *cpustate);
-       void (*opcode_table3f3_32[256])(i386_state *cpustate);
-       void (*opcode_table46638_16[256])(i386_state *cpustate);
-       void (*opcode_table46638_32[256])(i386_state *cpustate);
-       void (*opcode_table4f238_16[256])(i386_state *cpustate);
-       void (*opcode_table4f238_32[256])(i386_state *cpustate);
-       void (*opcode_table4f338_16[256])(i386_state *cpustate);
-       void (*opcode_table4f338_32[256])(i386_state *cpustate);
-       void (*opcode_table4663a_16[256])(i386_state *cpustate);
-       void (*opcode_table4663a_32[256])(i386_state *cpustate);
-       void (*opcode_table4f23a_16[256])(i386_state *cpustate);
-       void (*opcode_table4f23a_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table1_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table1_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table2_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table2_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table338_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table338_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table33a_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table33a_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table366_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table366_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table3f2_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table3f2_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table3f3_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table3f3_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table46638_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table46638_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table4f238_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table4f238_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table4f338_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table4f338_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table4663a_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table4663a_32[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table4f23a_16[256])(i386_state *cpustate);
+       void (__FASTCALL *opcode_table4f23a_32[256])(i386_state *cpustate);
 
        bool lock_table[2][256];
 
@@ -850,7 +850,7 @@ extern MODRM_TABLE i386_MODRM_table[256];
 
 /***********************************************************************************/
 
-INLINE int i386_limit_check(i386_state *cpustate, int seg, UINT32 offset, UINT32 size)
+INLINE int __FASTCALL i386_limit_check(i386_state *cpustate, int seg, UINT32 offset, UINT32 size)
 {
 //     size = 1; // TBD
        if(PROTECTED_MODE && !V8086_MODE)
@@ -877,7 +877,7 @@ INLINE int i386_limit_check(i386_state *cpustate, int seg, UINT32 offset, UINT32
        return 0;
 }
 
-INLINE UINT32 i386_translate(i386_state *cpustate, int segment, UINT32 ip, int rwn, UINT32 size)
+INLINE UINT32 __FASTCALL i386_translate(i386_state *cpustate, int segment, UINT32 ip, int rwn, UINT32 size)
 {
        // TODO: segment limit access size, execution permission, handle exception thrown from exception handler
        if(PROTECTED_MODE && !V8086_MODE && (rwn != -1))
@@ -896,7 +896,7 @@ INLINE UINT32 i386_translate(i386_state *cpustate, int segment, UINT32 ip, int r
 
 #define VTLB_FLAG_DIRTY 0x100
 
-vtlb_entry get_permissions(UINT32 pte, int wp)
+vtlb_entry __FASTCALL get_permissions(UINT32 pte, int wp)
 {
        vtlb_entry ret = VTLB_READ_ALLOWED | ((pte & 4) ? VTLB_USER_READ_ALLOWED : 0);
        if(!wp)
@@ -906,7 +906,7 @@ vtlb_entry get_permissions(UINT32 pte, int wp)
        return ret;
 }
 
-INLINE int i386_translate_address(i386_state *cpustate, int intention, offs_t *address, vtlb_entry *entry)
+/*INLINE*/ int __FASTCALL i386_translate_address(i386_state *cpustate, int intention, offs_t *address, vtlb_entry *entry)
 {
        UINT32 a = *address;
        UINT32 pdbr = cpustate->cr[3] & 0xfffff000;// I386_CR3_PD_MASK
@@ -994,7 +994,7 @@ INLINE int i386_translate_address(i386_state *cpustate, int intention, offs_t *a
        return ret;
 }
 
-INLINE int i386_translate_address_with_width(i386_state *cpustate, int intention, int width, offs_t *address, vtlb_entry *entry)
+/*INLINE*/ int __FASTCALL i386_translate_address_with_width(i386_state *cpustate, int intention, int width, offs_t *address, vtlb_entry *entry)
 {
        UINT32 a = *address;
        UINT32 pdbr = cpustate->cr[3] & 0xfffff000;// I386_CR3_PD_MASK
@@ -1091,7 +1091,7 @@ INLINE int i386_translate_address_with_width(i386_state *cpustate, int intention
 
 //#define TEST_TLB
 
-INLINE int translate_address(i386_state *cpustate, int pl, int type, UINT32 *address, UINT32 *error)
+/*INLINE*/ int __FASTCALL translate_address(i386_state *cpustate, int pl, int type, UINT32 *address, UINT32 *error)
 {
        if(!(cpustate->cr[0] & I386_CR0_PG)) // Some (very few) old OS's won't work with this
                return TRUE;
@@ -1134,7 +1134,7 @@ INLINE int translate_address(i386_state *cpustate, int pl, int type, UINT32 *add
 }
 
 
-INLINE int translate_address_with_width(i386_state *cpustate, int pl, int type, UINT32 width, UINT32 *address, UINT32 *error)
+/*INLINE*/ int __FASTCALL translate_address_with_width(i386_state *cpustate, int pl, int type, UINT32 width, UINT32 *address, UINT32 *error)
 {
        if(!(cpustate->cr[0] & I386_CR0_PG)) // Some (very few) old OS's won't work with this
                return TRUE;
@@ -1176,12 +1176,12 @@ INLINE int translate_address_with_width(i386_state *cpustate, int pl, int type,
        return TRUE;
 }
 
-INLINE void CHANGE_PC(i386_state *cpustate, UINT32 pc)
+INLINE void __FASTCALL CHANGE_PC(i386_state *cpustate, UINT32 pc)
 {
        cpustate->pc = i386_translate(cpustate, CS, pc, -1, 1 );
 }
 
-INLINE void NEAR_BRANCH(i386_state *cpustate, INT32 offs)
+INLINE void __FASTCALL NEAR_BRANCH(i386_state *cpustate, INT32 offs)
 {
        /* TODO: limit */
 //     if(i386_limit_check(cpustate, CS, cpustate->eip + offs, 1) != 0) {
@@ -1195,7 +1195,7 @@ INLINE void NEAR_BRANCH(i386_state *cpustate, INT32 offs)
 //     cpustate->pc = i386_translate(cpustate, CS, cpustate->pc + offs, -1, 1 );
 }
 
-INLINE UINT8 FETCH(i386_state *cpustate)
+INLINE UINT8 __FASTCALL FETCH(i386_state *cpustate)
 {
        UINT8 value;
        UINT32 address = cpustate->pc, error;
@@ -1212,7 +1212,7 @@ INLINE UINT8 FETCH(i386_state *cpustate)
        cpustate->pc++;
        return value;
 }
-INLINE UINT16 FETCH16(i386_state *cpustate)
+INLINE UINT16 __FASTCALL FETCH16(i386_state *cpustate)
 {
        UINT16 value;
        UINT32 address = cpustate->pc, error;
@@ -1238,7 +1238,7 @@ INLINE UINT16 FETCH16(i386_state *cpustate)
        }
        return value;
 }
-INLINE UINT32 FETCH32(i386_state *cpustate)
+INLINE UINT32 __FASTCALL FETCH32(i386_state *cpustate)
 {
        UINT32 value;
        UINT32 address = cpustate->pc, error;
@@ -1276,7 +1276,7 @@ INLINE UINT32 FETCH32(i386_state *cpustate)
        return value;
 }
 
-INLINE UINT8 READ8(i386_state *cpustate,UINT32 ea)
+INLINE UINT8 __FASTCALL READ8(i386_state *cpustate,UINT32 ea)
 {
        UINT32 address = ea, error;
 
@@ -1286,7 +1286,7 @@ INLINE UINT8 READ8(i386_state *cpustate,UINT32 ea)
        address &= cpustate->a20_mask;
        return read_data8_with_wait(cpustate, address);
 }
-INLINE UINT16 READ16(i386_state *cpustate,UINT32 ea)
+INLINE UINT16 __FASTCALL READ16(i386_state *cpustate,UINT32 ea)
 {
        UINT16 value;
        UINT32 address = ea, error;
@@ -1310,7 +1310,7 @@ INLINE UINT16 READ16(i386_state *cpustate,UINT32 ea)
        }
        return value;
 }
-INLINE UINT32 READ32(i386_state *cpustate,UINT32 ea)
+INLINE UINT32 __FASTCALL READ32(i386_state *cpustate,UINT32 ea)
 {
        UINT32 value;
        UINT32 address = ea, error;
@@ -1343,7 +1343,7 @@ INLINE UINT32 READ32(i386_state *cpustate,UINT32 ea)
        return value;
 }
 
-INLINE UINT64 READ64(i386_state *cpustate,UINT32 ea)
+INLINE UINT64 __FASTCALL READ64(i386_state *cpustate,UINT32 ea)
 {
        UINT64 value;
        UINT32 address = ea, error;
@@ -1392,7 +1392,8 @@ INLINE UINT64 READ64(i386_state *cpustate,UINT32 ea)
        }
        return value;
 }
-INLINE UINT8 READ8PL0(i386_state *cpustate,UINT32 ea)
+
+INLINE UINT8 __FASTCALL READ8PL0(i386_state *cpustate,UINT32 ea)
 {
        UINT32 address = ea, error;
 
@@ -1402,7 +1403,7 @@ INLINE UINT8 READ8PL0(i386_state *cpustate,UINT32 ea)
        address &= cpustate->a20_mask;
        return read_data8_with_wait(cpustate, address);
 }
-INLINE UINT16 READ16PL0(i386_state *cpustate,UINT32 ea)
+INLINE UINT16 __FASTCALL READ16PL0(i386_state *cpustate,UINT32 ea)
 {
        UINT16 value;
        UINT32 address = ea, error;
@@ -1426,7 +1427,7 @@ INLINE UINT16 READ16PL0(i386_state *cpustate,UINT32 ea)
        return value;
 }
 
-INLINE UINT32 READ32PL0(i386_state *cpustate,UINT32 ea)
+INLINE UINT32 __FASTCALL READ32PL0(i386_state *cpustate,UINT32 ea)
 {
        UINT32 value;
        UINT32 address = ea, error;
@@ -1459,14 +1460,14 @@ INLINE UINT32 READ32PL0(i386_state *cpustate,UINT32 ea)
        return value;
 }
 
-INLINE void WRITE_TEST(i386_state *cpustate,UINT32 ea)
+INLINE void __FASTCALL WRITE_TEST(i386_state *cpustate,UINT32 ea)
 {
        UINT32 address = ea, error;
        if(!translate_address(cpustate,cpustate->CPL,TRANSLATE_WRITE,&address,&error))
                PF_THROW(error);
 }
 
-INLINE void WRITE8(i386_state *cpustate,UINT32 ea, UINT8 value)
+INLINE void __FASTCALL WRITE8(i386_state *cpustate,UINT32 ea, UINT8 value)
 {
        UINT32 address = ea, error;
 
@@ -1476,7 +1477,7 @@ INLINE void WRITE8(i386_state *cpustate,UINT32 ea, UINT8 value)
        address &= cpustate->a20_mask;
        write_data8_with_wait(cpustate, address, value);
 }
-INLINE void WRITE16(i386_state *cpustate,UINT32 ea, UINT16 value)
+INLINE void __FASTCALL WRITE16(i386_state *cpustate,UINT32 ea, UINT16 value)
 {
        UINT32 address = ea, error;
 
@@ -1497,7 +1498,7 @@ INLINE void WRITE16(i386_state *cpustate,UINT32 ea, UINT16 value)
                write_data16_with_wait(cpustate, address, value);
        }
 }
-INLINE void WRITE32(i386_state *cpustate,UINT32 ea, UINT32 value)
+INLINE void __FASTCALL WRITE32(i386_state *cpustate,UINT32 ea, UINT32 value)
 {
        UINT32 address = ea, error;
 
@@ -1528,7 +1529,7 @@ INLINE void WRITE32(i386_state *cpustate,UINT32 ea, UINT32 value)
        }
 }
 
-INLINE void WRITE64(i386_state *cpustate,UINT32 ea, UINT64 value)
+INLINE void __FASTCALL WRITE64(i386_state *cpustate,UINT32 ea, UINT64 value)
 {
        UINT32 address = ea, error;
 
@@ -1575,21 +1576,21 @@ INLINE void WRITE64(i386_state *cpustate,UINT32 ea, UINT64 value)
 
 /***********************************************************************************/
 
-INLINE UINT8 OR8(i386_state *cpustate,UINT8 dst, UINT8 src)
+INLINE UINT8 __FASTCALL OR8(i386_state *cpustate,UINT8 dst, UINT8 src)
 {
        UINT8 res = dst | src;
        cpustate->CF = cpustate->OF = 0;
        SetSZPF8(res);
        return res;
 }
-INLINE UINT16 OR16(i386_state *cpustate,UINT16 dst, UINT16 src)
+INLINE UINT16 __FASTCALL OR16(i386_state *cpustate,UINT16 dst, UINT16 src)
 {
        UINT16 res = dst | src;
        cpustate->CF = cpustate->OF = 0;
        SetSZPF16(res);
        return res;
 }
-INLINE UINT32 OR32(i386_state *cpustate,UINT32 dst, UINT32 src)
+INLINE UINT32 __FASTCALL OR32(i386_state *cpustate,UINT32 dst, UINT32 src)
 {
        UINT32 res = dst | src;
        cpustate->CF = cpustate->OF = 0;
@@ -1597,21 +1598,21 @@ INLINE UINT32 OR32(i386_state *cpustate,UINT32 dst, UINT32 src)
        return res;
 }
 
-INLINE UINT8 AND8(i386_state *cpustate,UINT8 dst, UINT8 src)
+INLINE UINT8 __FASTCALL AND8(i386_state *cpustate,UINT8 dst, UINT8 src)
 {
        UINT8 res = dst & src;
        cpustate->CF = cpustate->OF = 0;
        SetSZPF8(res);
        return res;
 }
-INLINE UINT16 AND16(i386_state *cpustate,UINT16 dst, UINT16 src)
+INLINE UINT16 __FASTCALL AND16(i386_state *cpustate,UINT16 dst, UINT16 src)
 {
        UINT16 res = dst & src;
        cpustate->CF = cpustate->OF = 0;
        SetSZPF16(res);
        return res;
 }
-INLINE UINT32 AND32(i386_state *cpustate,UINT32 dst, UINT32 src)
+INLINE UINT32 __FASTCALL AND32(i386_state *cpustate,UINT32 dst, UINT32 src)
 {
        UINT32 res = dst & src;
        cpustate->CF = cpustate->OF = 0;
@@ -1619,21 +1620,21 @@ INLINE UINT32 AND32(i386_state *cpustate,UINT32 dst, UINT32 src)
        return res;
 }
 
-INLINE UINT8 XOR8(i386_state *cpustate,UINT8 dst, UINT8 src)
+INLINE UINT8 __FASTCALL XOR8(i386_state *cpustate,UINT8 dst, UINT8 src)
 {
        UINT8 res = dst ^ src;
        cpustate->CF = cpustate->OF = 0;
        SetSZPF8(res);
        return res;
 }
-INLINE UINT16 XOR16(i386_state *cpustate,UINT16 dst, UINT16 src)
+INLINE UINT16 __FASTCALL XOR16(i386_state *cpustate,UINT16 dst, UINT16 src)
 {
        UINT16 res = dst ^ src;
        cpustate->CF = cpustate->OF = 0;
        SetSZPF16(res);
        return res;
 }
-INLINE UINT32 XOR32(i386_state *cpustate,UINT32 dst, UINT32 src)
+INLINE UINT32 __FASTCALL XOR32(i386_state *cpustate,UINT32 dst, UINT32 src)
 {
        UINT32 res = dst ^ src;
        cpustate->CF = cpustate->OF = 0;
@@ -1642,7 +1643,7 @@ INLINE UINT32 XOR32(i386_state *cpustate,UINT32 dst, UINT32 src)
 }
 
 #define SUB8(cpu, dst, src) SBB8(cpu, dst, src, 0)
-INLINE UINT8 SBB8(i386_state *cpustate,UINT8 dst, UINT8 src, UINT8 b)
+INLINE UINT8 __FASTCALL SBB8(i386_state *cpustate,UINT8 dst, UINT8 src, UINT8 b)
 {
        UINT16 res = (UINT16)dst - (UINT16)src - (UINT8)b;
        SetCF8(res);
@@ -1653,7 +1654,7 @@ INLINE UINT8 SBB8(i386_state *cpustate,UINT8 dst, UINT8 src, UINT8 b)
 }
 
 #define SUB16(cpu, dst, src) SBB16(cpu, dst, src, 0)
-INLINE UINT16 SBB16(i386_state *cpustate,UINT16 dst, UINT16 src, UINT16 b)
+INLINE UINT16 __FASTCALL SBB16(i386_state *cpustate,UINT16 dst, UINT16 src, UINT16 b)
 {
        UINT32 res = (UINT32)dst - (UINT32)src - (UINT32)b;
        SetCF16(res);
@@ -1664,7 +1665,7 @@ INLINE UINT16 SBB16(i386_state *cpustate,UINT16 dst, UINT16 src, UINT16 b)
 }
 
 #define SUB32(cpu, dst, src) SBB32(cpu, dst, src, 0)
-INLINE UINT32 SBB32(i386_state *cpustate,UINT32 dst, UINT32 src, UINT32 b)
+INLINE UINT32 __FASTCALL SBB32(i386_state *cpustate,UINT32 dst, UINT32 src, UINT32 b)
 {
        UINT64 res = (UINT64)dst - (UINT64)src - (UINT64) b;
        SetCF32(res);
@@ -1675,7 +1676,7 @@ INLINE UINT32 SBB32(i386_state *cpustate,UINT32 dst, UINT32 src, UINT32 b)
 }
 
 #define ADD8(cpu, dst, src) ADC8(cpu, dst, src, 0)
-INLINE UINT8 ADC8(i386_state *cpustate,UINT8 dst, UINT8 src, UINT8 c)
+INLINE UINT8 __FASTCALL ADC8(i386_state *cpustate,UINT8 dst, UINT8 src, UINT8 c)
 {
        UINT16 res = (UINT16)dst + (UINT16)src + (UINT16)c;
        SetCF8(res);
@@ -1686,7 +1687,7 @@ INLINE UINT8 ADC8(i386_state *cpustate,UINT8 dst, UINT8 src, UINT8 c)
 }
 
 #define ADD16(cpu, dst, src) ADC16(cpu, dst, src, 0)
-INLINE UINT16 ADC16(i386_state *cpustate,UINT16 dst, UINT16 src, UINT8 c)
+INLINE UINT16 __FASTCALL ADC16(i386_state *cpustate,UINT16 dst, UINT16 src, UINT8 c)
 {
        UINT32 res = (UINT32)dst + (UINT32)src + (UINT32)c;
        SetCF16(res);
@@ -1697,7 +1698,7 @@ INLINE UINT16 ADC16(i386_state *cpustate,UINT16 dst, UINT16 src, UINT8 c)
 }
 
 #define ADD32(cpu, dst, src) ADC32(cpu, dst, src, 0)
-INLINE UINT32 ADC32(i386_state *cpustate,UINT32 dst, UINT32 src, UINT32 c)
+INLINE UINT32 __FASTCALL ADC32(i386_state *cpustate,UINT32 dst, UINT32 src, UINT32 c)
 {
        UINT64 res = (UINT64)dst + (UINT64)src + (UINT64) c;
        SetCF32(res);
@@ -1707,7 +1708,7 @@ INLINE UINT32 ADC32(i386_state *cpustate,UINT32 dst, UINT32 src, UINT32 c)
        return (UINT32)res;
 }
 
-INLINE UINT8 INC8(i386_state *cpustate,UINT8 dst)
+INLINE UINT8 __FASTCALL INC8(i386_state *cpustate,UINT8 dst)
 {
        UINT16 res = (UINT16)dst + 1;
        SetOF_Add8(res,1,dst);
@@ -1715,7 +1716,7 @@ INLINE UINT8 INC8(i386_state *cpustate,UINT8 dst)
        SetSZPF8(res);
        return (UINT8)res;
 }
-INLINE UINT16 INC16(i386_state *cpustate,UINT16 dst)
+INLINE UINT16 __FASTCALL INC16(i386_state *cpustate,UINT16 dst)
 {
        UINT32 res = (UINT32)dst + 1;
        SetOF_Add16(res,1,dst);
@@ -1723,7 +1724,7 @@ INLINE UINT16 INC16(i386_state *cpustate,UINT16 dst)
        SetSZPF16(res);
        return (UINT16)res;
 }
-INLINE UINT32 INC32(i386_state *cpustate,UINT32 dst)
+INLINE UINT32 __FASTCALL INC32(i386_state *cpustate,UINT32 dst)
 {
        UINT64 res = (UINT64)dst + 1;
        SetOF_Add32(res,1,dst);
@@ -1732,7 +1733,7 @@ INLINE UINT32 INC32(i386_state *cpustate,UINT32 dst)
        return (UINT32)res;
 }
 
-INLINE UINT8 DEC8(i386_state *cpustate,UINT8 dst)
+INLINE UINT8 __FASTCALL DEC8(i386_state *cpustate,UINT8 dst)
 {
        UINT16 res = (UINT16)dst - 1;
        SetOF_Sub8(res,1,dst);
@@ -1740,7 +1741,7 @@ INLINE UINT8 DEC8(i386_state *cpustate,UINT8 dst)
        SetSZPF8(res);
        return (UINT8)res;
 }
-INLINE UINT16 DEC16(i386_state *cpustate,UINT16 dst)
+INLINE UINT16 __FASTCALL DEC16(i386_state *cpustate,UINT16 dst)
 {
        UINT32 res = (UINT32)dst - 1;
        SetOF_Sub16(res,1,dst);
@@ -1748,7 +1749,7 @@ INLINE UINT16 DEC16(i386_state *cpustate,UINT16 dst)
        SetSZPF16(res);
        return (UINT16)res;
 }
-INLINE UINT32 DEC32(i386_state *cpustate,UINT32 dst)
+INLINE UINT32 __FASTCALL DEC32(i386_state *cpustate,UINT32 dst)
 {
        UINT64 res = (UINT64)dst - 1;
        SetOF_Sub32(res,1,dst);
@@ -1759,7 +1760,7 @@ INLINE UINT32 DEC32(i386_state *cpustate,UINT32 dst)
 
 
 
-INLINE void PUSH16(i386_state *cpustate,UINT16 value)
+INLINE void __FASTCALL PUSH16(i386_state *cpustate,UINT16 value)
 {
        UINT32 ea, new_esp;
        if( STACK_32BIT ) {
@@ -1774,7 +1775,7 @@ INLINE void PUSH16(i386_state *cpustate,UINT16 value)
                REG16(SP) = new_esp;
        }
 }
-INLINE void PUSH32(i386_state *cpustate,UINT32 value)
+INLINE void __FASTCALL PUSH32(i386_state *cpustate,UINT32 value)
 {
        UINT32 ea, new_esp;
        if( STACK_32BIT ) {
@@ -1790,7 +1791,7 @@ INLINE void PUSH32(i386_state *cpustate,UINT32 value)
        }
 }
 
-INLINE void PUSH32SEG(i386_state *cpustate,UINT32 value)
+INLINE void __FASTCALL PUSH32SEG(i386_state *cpustate,UINT32 value)
 {
        UINT32 ea, new_esp;
        if( STACK_32BIT ) {
@@ -1816,7 +1817,7 @@ INLINE void PUSH32SEG(i386_state *cpustate,UINT32 value)
        }
 }
 
-INLINE void PUSH8(i386_state *cpustate,UINT8 value)
+INLINE void __FASTCALL PUSH8(i386_state *cpustate,UINT8 value)
 {
        if( cpustate->operand_size ) {
                PUSH32(cpustate,(INT32)(INT8)value);
@@ -1825,7 +1826,7 @@ INLINE void PUSH8(i386_state *cpustate,UINT8 value)
        }
 }
 
-INLINE UINT8 POP8(i386_state *cpustate)
+INLINE UINT8 __FASTCALL POP8(i386_state *cpustate)
 {
        UINT8 value;
        UINT32 ea, new_esp;
@@ -1842,7 +1843,7 @@ INLINE UINT8 POP8(i386_state *cpustate)
        }
        return value;
 }
-INLINE UINT16 POP16(i386_state *cpustate)
+INLINE UINT16 __FASTCALL POP16(i386_state *cpustate)
 {
        UINT16 value;
        UINT32 ea, new_esp;
@@ -1859,7 +1860,7 @@ INLINE UINT16 POP16(i386_state *cpustate)
        }
        return value;
 }
-INLINE UINT32 POP32(i386_state *cpustate)
+INLINE UINT32 __FASTCALL POP32(i386_state *cpustate)
 {
        UINT32 value;
        UINT32 ea, new_esp;
@@ -1877,7 +1878,7 @@ INLINE UINT32 POP32(i386_state *cpustate)
        return value;
 }
 
-INLINE void BUMP_SI(i386_state *cpustate,int adjustment)
+INLINE void __FASTCALL BUMP_SI(i386_state *cpustate,int adjustment)
 {
        if ( cpustate->address_size )
                REG32(ESI) += ((cpustate->DF) ? -adjustment : +adjustment);
@@ -1885,7 +1886,7 @@ INLINE void BUMP_SI(i386_state *cpustate,int adjustment)
                REG16(SI) += ((cpustate->DF) ? -adjustment : +adjustment);
 }
 
-INLINE void BUMP_DI(i386_state *cpustate,int adjustment)
+INLINE void __FASTCALL BUMP_DI(i386_state *cpustate,int adjustment)
 {
        if ( cpustate->address_size )
                REG32(EDI) += ((cpustate->DF) ? -adjustment : +adjustment);
@@ -1899,7 +1900,7 @@ INLINE void BUMP_DI(i386_state *cpustate,int adjustment)
     I/O ACCESS
 ***********************************************************************************/
 
-INLINE void check_ioperm(i386_state *cpustate, offs_t port, UINT8 mask)
+INLINE void __FASTCALL check_ioperm(i386_state *cpustate, offs_t port, UINT8 mask)
 {
        UINT8 IOPL, map;
        UINT16 IOPB;
@@ -1926,19 +1927,19 @@ INLINE void check_ioperm(i386_state *cpustate, offs_t port, UINT8 mask)
                FAULT_THROW(FAULT_GP,0);
 }
 
-INLINE UINT8 READPORT8(i386_state *cpustate, offs_t port)
+INLINE UINT8 __FASTCALL READPORT8(i386_state *cpustate, offs_t port)
 {
        check_ioperm(cpustate, port, 1);
        return cpustate->io->read_io8(port);
 }
 
-INLINE void WRITEPORT8(i386_state *cpustate, offs_t port, UINT8 value)
+INLINE void __FASTCALL WRITEPORT8(i386_state *cpustate, offs_t port, UINT8 value)
 {
        check_ioperm(cpustate, port, 1);
        cpustate->io->write_io8(port, value);
 }
 
-INLINE UINT16 READPORT16(i386_state *cpustate, offs_t port)
+INLINE UINT16 __FASTCALL READPORT16(i386_state *cpustate, offs_t port)
 {
        if (port & 1)
        {
@@ -1953,7 +1954,7 @@ INLINE UINT16 READPORT16(i386_state *cpustate, offs_t port)
        }
 }
 
-INLINE void WRITEPORT16(i386_state *cpustate, offs_t port, UINT16 value)
+INLINE void __FASTCALL WRITEPORT16(i386_state *cpustate, offs_t port, UINT16 value)
 {
        if (port & 1)
        {
@@ -1967,7 +1968,7 @@ INLINE void WRITEPORT16(i386_state *cpustate, offs_t port, UINT16 value)
        }
 }
 
-INLINE UINT32 READPORT32(i386_state *cpustate, offs_t port)
+INLINE UINT32 __FASTCALL READPORT32(i386_state *cpustate, offs_t port)
 {
        if (port & 3)
        {
@@ -1984,7 +1985,7 @@ INLINE UINT32 READPORT32(i386_state *cpustate, offs_t port)
        }
 }
 
-INLINE void WRITEPORT32(i386_state *cpustate, offs_t port, UINT32 value)
+INLINE void __FASTCALL WRITEPORT32(i386_state *cpustate, offs_t port, UINT32 value)
 {
        if (port & 3)
        {
@@ -2005,7 +2006,7 @@ INLINE void WRITEPORT32(i386_state *cpustate, offs_t port, UINT32 value)
 ***********************************************************************************/
 
 // Pentium MSR handling
-UINT64 pentium_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
+UINT64 __FASTCALL pentium_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
 {
        switch(offset)
        {
@@ -2048,7 +2049,7 @@ UINT64 pentium_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
        return -1;
 }
 
-void pentium_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid_msr)
+void __FASTCALL pentium_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid_msr)
 {
        switch(offset)
        {
@@ -2094,7 +2095,7 @@ void pentium_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *
 }
 
 // P6 (Pentium Pro, Pentium II, Pentium III) MSR handling
-UINT64 p6_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
+UINT64 __FASTCALL p6_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
 {
        switch(offset)
        {
@@ -2127,7 +2128,7 @@ UINT64 p6_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
        return -1;
 }
 
-void p6_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid_msr)
+void __FASTCALL p6_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid_msr)
 {
        switch(offset)
        {
@@ -2154,7 +2155,7 @@ void p6_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid
 }
 
 // PIV (Pentium 4+)
-UINT64 piv_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
+UINT64 __FASTCALL piv_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
 {
        switch(offset)
        {
@@ -2166,7 +2167,7 @@ UINT64 piv_msr_read(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
        return -1;
 }
 
-void piv_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid_msr)
+void __FASTCALL piv_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid_msr)
 {
        switch(offset)
        {
@@ -2177,7 +2178,7 @@ void piv_msr_write(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *vali
        }
 }
 
-INLINE UINT64 MSR_READ(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
+INLINE UINT64 __FASTCALL MSR_READ(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
 {
        UINT64 res;
        UINT8 cpu_type = (cpustate->cpu_version >> 8) & 0x0f;
@@ -2203,7 +2204,7 @@ INLINE UINT64 MSR_READ(i386_state *cpustate, UINT32 offset,UINT8 *valid_msr)
        return res;
 }
 
-INLINE void MSR_WRITE(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid_msr)
+INLINE void __FASTCALL MSR_WRITE(i386_state *cpustate, UINT32 offset, UINT64 data, UINT8 *valid_msr)
 {
        *valid_msr = 0;
        UINT8 cpu_type = (cpustate->cpu_version >> 8) & 0x0f;
index 23923e0..83355dc 100644 (file)
@@ -2,7 +2,7 @@
 // copyright-holders:Ville Linde, Barry Rodewald, Carl, Philip Bennett
 // Intel 486+ specific opcodes
 
-static void I486OP(cpuid)(i386_state *cpustate)             // Opcode 0x0F A2
+static void __FASTCALL I486OP(cpuid)(i386_state *cpustate)             // Opcode 0x0F A2
 {
        if (cpustate->cpuid_id0 == 0)
        {
@@ -35,18 +35,18 @@ static void I486OP(cpuid)(i386_state *cpustate)             // Opcode 0x0F A2
        }
 }
 
-static void I486OP(invd)(i386_state *cpustate)              // Opcode 0x0f 08
+static void __FASTCALL I486OP(invd)(i386_state *cpustate)              // Opcode 0x0f 08
 {
        // Nothing to do ?
        CYCLES(cpustate,CYCLES_INVD);
 }
 
-static void I486OP(wbinvd)(i386_state *cpustate)            // Opcode 0x0f 09
+static void __FASTCALL I486OP(wbinvd)(i386_state *cpustate)            // Opcode 0x0f 09
 {
        // Nothing to do ?
 }
 
-static void I486OP(cmpxchg_rm8_r8)(i386_state *cpustate)    // Opcode 0x0f b0
+static void __FASTCALL I486OP(cmpxchg_rm8_r8)(i386_state *cpustate)    // Opcode 0x0f b0
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -80,7 +80,7 @@ static void I486OP(cmpxchg_rm8_r8)(i386_state *cpustate)    // Opcode 0x0f b0
        }
 }
 
-static void I486OP(cmpxchg_rm16_r16)(i386_state *cpustate)  // Opcode 0x0f b1
+static void __FASTCALL I486OP(cmpxchg_rm16_r16)(i386_state *cpustate)  // Opcode 0x0f b1
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -113,7 +113,7 @@ static void I486OP(cmpxchg_rm16_r16)(i386_state *cpustate)  // Opcode 0x0f b1
        }
 }
 
-static void I486OP(cmpxchg_rm32_r32)(i386_state *cpustate)  // Opcode 0x0f b1
+static void __FASTCALL I486OP(cmpxchg_rm32_r32)(i386_state *cpustate)  // Opcode 0x0f b1
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -146,7 +146,7 @@ static void I486OP(cmpxchg_rm32_r32)(i386_state *cpustate)  // Opcode 0x0f b1
        }
 }
 
-static void I486OP(xadd_rm8_r8)(i386_state *cpustate)   // Opcode 0x0f c0
+static void __FASTCALL I486OP(xadd_rm8_r8)(i386_state *cpustate)   // Opcode 0x0f c0
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -167,7 +167,7 @@ static void I486OP(xadd_rm8_r8)(i386_state *cpustate)   // Opcode 0x0f c0
        }
 }
 
-static void I486OP(xadd_rm16_r16)(i386_state *cpustate) // Opcode 0x0f c1
+static void __FASTCALL I486OP(xadd_rm16_r16)(i386_state *cpustate) // Opcode 0x0f c1
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -188,7 +188,7 @@ static void I486OP(xadd_rm16_r16)(i386_state *cpustate) // Opcode 0x0f c1
        }
 }
 
-static void I486OP(xadd_rm32_r32)(i386_state *cpustate) // Opcode 0x0f c1
+static void __FASTCALL I486OP(xadd_rm32_r32)(i386_state *cpustate) // Opcode 0x0f c1
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -209,7 +209,7 @@ static void I486OP(xadd_rm32_r32)(i386_state *cpustate) // Opcode 0x0f c1
        }
 }
 
-static void I486OP(group0F01_16)(i386_state *cpustate)      // Opcode 0x0f 01
+static void __FASTCALL I486OP(group0F01_16)(i386_state *cpustate)      // Opcode 0x0f 01
 {
        UINT8 modrm = FETCH(cpustate);
        UINT16 address;
@@ -327,7 +327,7 @@ static void I486OP(group0F01_16)(i386_state *cpustate)      // Opcode 0x0f 01
        }
 }
 
-static void I486OP(group0F01_32)(i386_state *cpustate)      // Opcode 0x0f 01
+static void __FASTCALL I486OP(group0F01_32)(i386_state *cpustate)      // Opcode 0x0f 01
 {
        UINT8 modrm = FETCH(cpustate);
        UINT32 address, ea;
@@ -445,55 +445,55 @@ static void I486OP(group0F01_32)(i386_state *cpustate)      // Opcode 0x0f 01
        }
 }
 
-static void I486OP(bswap_eax)(i386_state *cpustate)     // Opcode 0x0f 38
+static void __FASTCALL I486OP(bswap_eax)(i386_state *cpustate)     // Opcode 0x0f 38
 {
        REG32(EAX) = SWITCH_ENDIAN_32(REG32(EAX));
        CYCLES(cpustate,1);     // TODO
 }
 
-static void I486OP(bswap_ecx)(i386_state *cpustate)     // Opcode 0x0f 39
+static void __FASTCALL I486OP(bswap_ecx)(i386_state *cpustate)     // Opcode 0x0f 39
 {
        REG32(ECX) = SWITCH_ENDIAN_32(REG32(ECX));
        CYCLES(cpustate,1);     // TODO
 }
 
-static void I486OP(bswap_edx)(i386_state *cpustate)     // Opcode 0x0f 3A
+static void __FASTCALL I486OP(bswap_edx)(i386_state *cpustate)     // Opcode 0x0f 3A
 {
        REG32(EDX) = SWITCH_ENDIAN_32(REG32(EDX));
        CYCLES(cpustate,1);     // TODO
 }
 
-static void I486OP(bswap_ebx)(i386_state *cpustate)     // Opcode 0x0f 3B
+static void __FASTCALL I486OP(bswap_ebx)(i386_state *cpustate)     // Opcode 0x0f 3B
 {
        REG32(EBX) = SWITCH_ENDIAN_32(REG32(EBX));
        CYCLES(cpustate,1);     // TODO
 }
 
-static void I486OP(bswap_esp)(i386_state *cpustate)     // Opcode 0x0f 3C
+static void __FASTCALL I486OP(bswap_esp)(i386_state *cpustate)     // Opcode 0x0f 3C
 {
        REG32(ESP) = SWITCH_ENDIAN_32(REG32(ESP));
        CYCLES(cpustate,1);     // TODO
 }
 
-static void I486OP(bswap_ebp)(i386_state *cpustate)     // Opcode 0x0f 3D
+static void __FASTCALL I486OP(bswap_ebp)(i386_state *cpustate)     // Opcode 0x0f 3D
 {
        REG32(EBP) = SWITCH_ENDIAN_32(REG32(EBP));
        CYCLES(cpustate,1);     // TODO
 }
 
-static void I486OP(bswap_esi)(i386_state *cpustate)     // Opcode 0x0f 3E
+static void __FASTCALL I486OP(bswap_esi)(i386_state *cpustate)     // Opcode 0x0f 3E
 {
        REG32(ESI) = SWITCH_ENDIAN_32(REG32(ESI));
        CYCLES(cpustate,1);     // TODO
 }
 
-static void I486OP(bswap_edi)(i386_state *cpustate)     // Opcode 0x0f 3F
+static void __FASTCALL I486OP(bswap_edi)(i386_state *cpustate)     // Opcode 0x0f 3F
 {
        REG32(EDI) = SWITCH_ENDIAN_32(REG32(EDI));
        CYCLES(cpustate,1);     // TODO
 }
 
-static void I486OP(mov_cr_r32)(i386_state *cpustate)        // Opcode 0x0f 22
+static void __FASTCALL I486OP(mov_cr_r32)(i386_state *cpustate)        // Opcode 0x0f 22
 {
        if(PROTECTED_MODE && cpustate->CPL)
                FAULT(FAULT_GP, 0);
index ea38632..a6ad25f 100644 (file)
@@ -5,55 +5,55 @@
 extern flag float32_is_nan( float32 a ); // since its not defined in softfloat.h
 extern flag float64_is_nan( float64 a ); // since its not defined in softfloat.h
 
-INLINE void MMXPROLOG(i386_state *cpustate)
+INLINE void __FASTCALL MMXPROLOG(i386_state *cpustate)
 {
        //cpustate->x87_sw &= ~(X87_SW_TOP_MASK << X87_SW_TOP_SHIFT); // top = 0
        cpustate->x87_tw = 0; // tag word = 0
 }
 
-INLINE void READMMX(i386_state *cpustate,UINT32 ea,MMX_REG &r)
+INLINE void __FASTCALL READMMX(i386_state *cpustate,UINT32 ea,MMX_REG &r)
 {
        r.q=READ64(cpustate, ea);
 }
 
-INLINE void WRITEMMX(i386_state *cpustate,UINT32 ea,MMX_REG &r)
+INLINE void __FASTCALL WRITEMMX(i386_state *cpustate,UINT32 ea,MMX_REG &r)
 {
        WRITE64(cpustate, ea, r.q);
 }
 
-INLINE void READXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r)
+INLINE void __FASTCALL READXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r)
 {
        r.q[0]=READ64(cpustate, ea);
        r.q[1]=READ64(cpustate, ea+8);
 }
 
-INLINE void WRITEXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r)
+INLINE void __FASTCALL WRITEXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r)
 {
        WRITE64(cpustate, ea, r.q[0]);
        WRITE64(cpustate, ea+8, r.q[1]);
 }
 
-INLINE void READXMM_LO64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
+INLINE void __FASTCALL READXMM_LO64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
 {
        r.q[0]=READ64(cpustate, ea);
 }
 
-INLINE void WRITEXMM_LO64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
+INLINE void __FASTCALL WRITEXMM_LO64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
 {
        WRITE64(cpustate, ea, r.q[0]);
 }
 
-INLINE void READXMM_HI64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
+INLINE void __FASTCALL READXMM_HI64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
 {
        r.q[1]=READ64(cpustate, ea);
 }
 
-INLINE void WRITEXMM_HI64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
+INLINE void __FASTCALL WRITEXMM_HI64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
 {
        WRITE64(cpustate, ea, r.q[1]);
 }
 
-static void PENTIUMOP(rdmsr)(i386_state* cpustate)          // Opcode 0x0f 32
+static void __FASTCALL PENTIUMOP(rdmsr)(i386_state* cpustate)          // Opcode 0x0f 32
 {
        UINT64 data;
        UINT8 valid_msr = 0;
@@ -71,7 +71,7 @@ static void PENTIUMOP(rdmsr)(i386_state* cpustate)          // Opcode 0x0f 32
        CYCLES(cpustate,CYCLES_RDMSR);
 }
 
-static void PENTIUMOP(wrmsr)(i386_state* cpustate)          // Opcode 0x0f 30
+static void __FASTCALL PENTIUMOP(wrmsr)(i386_state* cpustate)          // Opcode 0x0f 30
 {
        UINT64 data;
        UINT8 valid_msr = 0;
@@ -87,7 +87,7 @@ static void PENTIUMOP(wrmsr)(i386_state* cpustate)          // Opcode 0x0f 30
        CYCLES(cpustate,1);     // TODO: correct cycle count (~30-45)
 }
 
-static void PENTIUMOP(rdtsc)(i386_state* cpustate)          // Opcode 0x0f 31
+static void __FASTCALL PENTIUMOP(rdtsc)(i386_state* cpustate)          // Opcode 0x0f 31
 {
        UINT64 ts = cpustate->tsc + (cpustate->base_cycles - cpustate->cycles);
        REG32(EAX) = (UINT32)(ts);
@@ -96,12 +96,12 @@ static void PENTIUMOP(rdtsc)(i386_state* cpustate)          // Opcode 0x0f 31
        CYCLES(cpustate,CYCLES_RDTSC);
 }
 
-static void PENTIUMOP(ud2)(i386_state* cpustate)    // Opcode 0x0f 0b
+static void __FASTCALL PENTIUMOP(ud2)(i386_state* cpustate)    // Opcode 0x0f 0b
 {
        i386_trap(cpustate, 6, 0, 0);
 }
 
-static void PENTIUMOP(rsm)(i386_state* cpustate)
+static void __FASTCALL PENTIUMOP(rsm)(i386_state* cpustate)
 {
        UINT32 smram_state = cpustate->smbase + 0xfe00;
        if(!cpustate->smm)
@@ -198,14 +198,14 @@ static void PENTIUMOP(rsm)(i386_state* cpustate)
        }
 }
 
-static void PENTIUMOP(prefetch_m8)(i386_state* cpustate)    // Opcode 0x0f 18
+static void __FASTCALL PENTIUMOP(prefetch_m8)(i386_state* cpustate)    // Opcode 0x0f 18
 {
        UINT8 modrm = FETCH(cpustate);
        UINT32 ea = GetEA(cpustate,modrm,0,1);
        CYCLES(cpustate,1+(ea & 1)); // TODO: correct cycle count
 }
 
-static void PENTIUMOP(cmovo_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 40
+static void __FASTCALL PENTIUMOP(cmovo_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 40
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -231,7 +231,7 @@ static void PENTIUMOP(cmovo_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 40
        }
 }
 
-static void PENTIUMOP(cmovo_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 40
+static void __FASTCALL PENTIUMOP(cmovo_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 40
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -257,7 +257,7 @@ static void PENTIUMOP(cmovo_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 40
        }
 }
 
-static void PENTIUMOP(cmovno_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 41
+static void __FASTCALL PENTIUMOP(cmovno_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 41
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -283,7 +283,7 @@ static void PENTIUMOP(cmovno_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovno_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 41
+static void __FASTCALL PENTIUMOP(cmovno_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 41
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -309,7 +309,7 @@ static void PENTIUMOP(cmovno_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovb_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 42
+static void __FASTCALL PENTIUMOP(cmovb_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 42
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -335,7 +335,7 @@ static void PENTIUMOP(cmovb_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 42
        }
 }
 
-static void PENTIUMOP(cmovb_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 42
+static void __FASTCALL PENTIUMOP(cmovb_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 42
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -361,7 +361,7 @@ static void PENTIUMOP(cmovb_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 42
        }
 }
 
-static void PENTIUMOP(cmovae_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 43
+static void __FASTCALL PENTIUMOP(cmovae_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 43
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -387,7 +387,7 @@ static void PENTIUMOP(cmovae_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovae_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 43
+static void __FASTCALL PENTIUMOP(cmovae_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 43
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -413,7 +413,7 @@ static void PENTIUMOP(cmovae_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmove_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 44
+static void __FASTCALL PENTIUMOP(cmove_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 44
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -439,7 +439,7 @@ static void PENTIUMOP(cmove_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 44
        }
 }
 
-static void PENTIUMOP(cmove_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 44
+static void __FASTCALL PENTIUMOP(cmove_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 44
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -465,7 +465,7 @@ static void PENTIUMOP(cmove_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 44
        }
 }
 
-static void PENTIUMOP(cmovne_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 45
+static void __FASTCALL PENTIUMOP(cmovne_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 45
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -491,7 +491,7 @@ static void PENTIUMOP(cmovne_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovne_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 45
+static void __FASTCALL PENTIUMOP(cmovne_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 45
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -517,7 +517,7 @@ static void PENTIUMOP(cmovne_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovbe_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 46
+static void __FASTCALL PENTIUMOP(cmovbe_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 46
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -543,7 +543,7 @@ static void PENTIUMOP(cmovbe_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovbe_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 46
+static void __FASTCALL PENTIUMOP(cmovbe_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 46
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -569,7 +569,7 @@ static void PENTIUMOP(cmovbe_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmova_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 47
+static void __FASTCALL PENTIUMOP(cmova_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 47
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -595,7 +595,7 @@ static void PENTIUMOP(cmova_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 47
        }
 }
 
-static void PENTIUMOP(cmova_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 47
+static void __FASTCALL PENTIUMOP(cmova_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 47
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -621,7 +621,7 @@ static void PENTIUMOP(cmova_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 47
        }
 }
 
-static void PENTIUMOP(cmovs_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 48
+static void __FASTCALL PENTIUMOP(cmovs_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 48
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -647,7 +647,7 @@ static void PENTIUMOP(cmovs_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 48
        }
 }
 
-static void PENTIUMOP(cmovs_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 48
+static void __FASTCALL PENTIUMOP(cmovs_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 48
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -673,7 +673,7 @@ static void PENTIUMOP(cmovs_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 48
        }
 }
 
-static void PENTIUMOP(cmovns_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 49
+static void __FASTCALL PENTIUMOP(cmovns_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 49
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -699,7 +699,7 @@ static void PENTIUMOP(cmovns_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovns_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 49
+static void __FASTCALL PENTIUMOP(cmovns_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 49
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -725,7 +725,7 @@ static void PENTIUMOP(cmovns_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovp_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4a
+static void __FASTCALL PENTIUMOP(cmovp_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4a
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -751,7 +751,7 @@ static void PENTIUMOP(cmovp_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4a
        }
 }
 
-static void PENTIUMOP(cmovp_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4a
+static void __FASTCALL PENTIUMOP(cmovp_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4a
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -777,7 +777,7 @@ static void PENTIUMOP(cmovp_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4a
        }
 }
 
-static void PENTIUMOP(cmovnp_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4b
+static void __FASTCALL PENTIUMOP(cmovnp_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4b
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -803,7 +803,7 @@ static void PENTIUMOP(cmovnp_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovnp_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4b
+static void __FASTCALL PENTIUMOP(cmovnp_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4b
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -829,7 +829,7 @@ static void PENTIUMOP(cmovnp_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovl_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4c
+static void __FASTCALL PENTIUMOP(cmovl_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4c
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -855,7 +855,7 @@ static void PENTIUMOP(cmovl_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4c
        }
 }
 
-static void PENTIUMOP(cmovl_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4c
+static void __FASTCALL PENTIUMOP(cmovl_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4c
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -881,7 +881,7 @@ static void PENTIUMOP(cmovl_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4c
        }
 }
 
-static void PENTIUMOP(cmovge_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4d
+static void __FASTCALL PENTIUMOP(cmovge_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4d
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -907,7 +907,7 @@ static void PENTIUMOP(cmovge_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovge_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4d
+static void __FASTCALL PENTIUMOP(cmovge_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4d
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -933,7 +933,7 @@ static void PENTIUMOP(cmovge_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovle_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4e
+static void __FASTCALL PENTIUMOP(cmovle_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4e
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -959,7 +959,7 @@ static void PENTIUMOP(cmovle_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovle_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4e
+static void __FASTCALL PENTIUMOP(cmovle_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4e
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -985,7 +985,7 @@ static void PENTIUMOP(cmovle_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4
        }
 }
 
-static void PENTIUMOP(cmovg_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4f
+static void __FASTCALL PENTIUMOP(cmovg_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4f
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -1011,7 +1011,7 @@ static void PENTIUMOP(cmovg_r16_rm16)(i386_state* cpustate)    // Opcode 0x0f 4f
        }
 }
 
-static void PENTIUMOP(cmovg_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4f
+static void __FASTCALL PENTIUMOP(cmovg_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4f
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -1037,7 +1037,7 @@ static void PENTIUMOP(cmovg_r32_rm32)(i386_state* cpustate)    // Opcode 0x0f 4f
        }
 }
 
-static void PENTIUMOP(movnti_m16_r16)(i386_state* cpustate) // Opcode 0f c3
+static void __FASTCALL PENTIUMOP(movnti_m16_r16)(i386_state* cpustate) // Opcode 0f c3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1051,7 +1051,7 @@ static void PENTIUMOP(movnti_m16_r16)(i386_state* cpustate) // Opcode 0f c3
        }
 }
 
-static void PENTIUMOP(movnti_m32_r32)(i386_state* cpustate) // Opcode 0f c3
+static void __FASTCALL PENTIUMOP(movnti_m32_r32)(i386_state* cpustate) // Opcode 0f c3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -1084,7 +1084,7 @@ static void I386OP(cyrix_unknown)(i386_state* cpustate)     // Opcode 0x0f 74
        CYCLES(cpustate,1);
 }
 
-static void PENTIUMOP(cmpxchg8b_m64)(i386_state* cpustate)  // Opcode 0x0f c7
+static void __FASTCALL PENTIUMOP(cmpxchg8b_m64)(i386_state* cpustate)  // Opcode 0x0f c7
 {
        UINT8 modm = FETCH(cpustate);
        if( modm >= 0xc0 ) {
@@ -1108,7 +1108,7 @@ static void PENTIUMOP(cmpxchg8b_m64)(i386_state* cpustate)  // Opcode 0x0f c7
        }
 }
 
-static void PENTIUMOP(movntq_m64_r64)(i386_state* cpustate) // Opcode 0f e7
+static void __FASTCALL PENTIUMOP(movntq_m64_r64)(i386_state* cpustate) // Opcode 0f e7
 {
        //MMXPROLOG(cpustate); // TODO: check if needed
        UINT8 modrm = FETCH(cpustate);
@@ -1122,7 +1122,7 @@ static void PENTIUMOP(movntq_m64_r64)(i386_state* cpustate) // Opcode 0f e7
        }
 }
 
-static void PENTIUMOP(maskmovq_r64_r64)(i386_state* cpustate)  // Opcode 0f f7
+static void __FASTCALL PENTIUMOP(maskmovq_r64_r64)(i386_state* cpustate)  // Opcode 0f f7
 {
        int s,m,n;
        UINT8 modm = FETCH(cpustate);
@@ -1147,7 +1147,7 @@ static void SSEOP(maskmovdqu_r128_r128)(i386_state* cpustate)  // Opcode 66 0f f
                        WRITE8(cpustate,ea+n, XMM(s).b[n]);
 }
 
-static void PENTIUMOP(popcnt_r16_rm16)(i386_state* cpustate)    // Opcode f3 0f b8
+static void __FASTCALL PENTIUMOP(popcnt_r16_rm16)(i386_state* cpustate)    // Opcode f3 0f b8
 {
        UINT16 src;
        UINT8 modrm = FETCH(cpustate);
@@ -1168,7 +1168,7 @@ static void PENTIUMOP(popcnt_r16_rm16)(i386_state* cpustate)    // Opcode f3 0f
        CYCLES(cpustate,1); // TODO: correct cycle count
 }
 
-static void PENTIUMOP(popcnt_r32_rm32)(i386_state* cpustate)    // Opcode f3 0f b8
+static void __FASTCALL PENTIUMOP(popcnt_r32_rm32)(i386_state* cpustate)    // Opcode f3 0f b8
 {
        UINT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -1189,21 +1189,21 @@ static void PENTIUMOP(popcnt_r32_rm32)(i386_state* cpustate)    // Opcode f3 0f
        CYCLES(cpustate,1); // TODO: correct cycle count
 }
 
-static void PENTIUMOP(tzcnt_r16_rm16)(i386_state* cpustate)
+static void __FASTCALL PENTIUMOP(tzcnt_r16_rm16)(i386_state* cpustate)
 {
        // for CPUs that don't support TZCNT, fall back to BSF
        i386_bsf_r16_rm16(cpustate);
        // TODO: actually implement TZCNT
 }
 
-static void PENTIUMOP(tzcnt_r32_rm32)(i386_state* cpustate)
+static void __FASTCALL PENTIUMOP(tzcnt_r32_rm32)(i386_state* cpustate)
 {
        // for CPUs that don't support TZCNT, fall back to BSF
        i386_bsf_r32_rm32(cpustate);
        // TODO: actually implement TZCNT
 }
 
-INLINE INT8 SaturatedSignedWordToSignedByte(INT16 word)
+INLINE INT8 __FASTCALL SaturatedSignedWordToSignedByte(INT16 word)
 {
        if (word > 127)
                return 127;
@@ -1212,7 +1212,7 @@ INLINE INT8 SaturatedSignedWordToSignedByte(INT16 word)
        return (INT8)word;
 }
 
-INLINE UINT8 SaturatedSignedWordToUnsignedByte(INT16 word)
+INLINE UINT8 __FASTCALL SaturatedSignedWordToUnsignedByte(INT16 word)
 {
        if (word > 255)
                return 255;
@@ -1221,7 +1221,7 @@ INLINE UINT8 SaturatedSignedWordToUnsignedByte(INT16 word)
        return (UINT8)word;
 }
 
-INLINE INT16 SaturatedSignedDwordToSignedWord(INT32 dword)
+INLINE INT16 __FASTCALL SaturatedSignedDwordToSignedWord(INT32 dword)
 {
        if (dword > 32767)
                return 32767;
@@ -1230,7 +1230,7 @@ INLINE INT16 SaturatedSignedDwordToSignedWord(INT32 dword)
        return (INT16)dword;
 }
 
-INLINE UINT16 SaturatedSignedDwordToUnsignedWord(INT32 dword)
+INLINE UINT16 __FASTCALL SaturatedSignedDwordToUnsignedWord(INT32 dword)
 {
        if (dword > 65535)
                return 65535;
@@ -1239,7 +1239,7 @@ INLINE UINT16 SaturatedSignedDwordToUnsignedWord(INT32 dword)
        return (UINT16)dword;
 }
 
-static void MMXOP(group_0f71)(i386_state* cpustate)  // Opcode 0f 71
+static void __FASTCALL MMXOP(group_0f71)(i386_state* cpustate)  // Opcode 0f 71
 {
        UINT8 modm = FETCH(cpustate);
        UINT8 imm8 = FETCH(cpustate);
@@ -1296,7 +1296,7 @@ static void SSEOP(group_660f71)(i386_state* cpustate)  // Opcode 66 0f 71
        }
 }
 
-static void MMXOP(group_0f72)(i386_state* cpustate)  // Opcode 0f 72
+static void __FASTCALL MMXOP(group_0f72)(i386_state* cpustate)  // Opcode 0f 72
 {
        UINT8 modm = FETCH(cpustate);
        UINT8 imm8 = FETCH(cpustate);
@@ -1347,7 +1347,7 @@ static void SSEOP(group_660f72)(i386_state* cpustate)  // Opcode 66 0f 72
        }
 }
 
-static void MMXOP(group_0f73)(i386_state* cpustate)  // Opcode 0f 73
+static void __FASTCALL MMXOP(group_0f73)(i386_state* cpustate)  // Opcode 0f 73
 {
        UINT8 modm = FETCH(cpustate);
        UINT8 imm8 = FETCH(cpustate);
@@ -1428,7 +1428,7 @@ static void SSEOP(group_660f73)(i386_state* cpustate)  // Opcode 66 0f 73
        }
 }
 
-static void MMXOP(psrlw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d1
+static void __FASTCALL MMXOP(psrlw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d1
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1451,7 +1451,7 @@ static void MMXOP(psrlw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d1
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psrld_r64_rm64)(i386_state* cpustate)  // Opcode 0f d2
+static void __FASTCALL MMXOP(psrld_r64_rm64)(i386_state* cpustate)  // Opcode 0f d2
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1470,7 +1470,7 @@ static void MMXOP(psrld_r64_rm64)(i386_state* cpustate)  // Opcode 0f d2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psrlq_r64_rm64)(i386_state* cpustate)  // Opcode 0f d3
+static void __FASTCALL MMXOP(psrlq_r64_rm64)(i386_state* cpustate)  // Opcode 0f d3
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1487,7 +1487,7 @@ static void MMXOP(psrlq_r64_rm64)(i386_state* cpustate)  // Opcode 0f d3
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(paddq_r64_rm64)(i386_state* cpustate)  // Opcode 0f d4
+static void __FASTCALL MMXOP(paddq_r64_rm64)(i386_state* cpustate)  // Opcode 0f d4
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1502,7 +1502,7 @@ static void MMXOP(paddq_r64_rm64)(i386_state* cpustate)  // Opcode 0f d4
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pmullw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d5
+static void __FASTCALL MMXOP(pmullw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d5
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1523,7 +1523,7 @@ static void MMXOP(pmullw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psubusb_r64_rm64)(i386_state* cpustate)  // Opcode 0f d8
+static void __FASTCALL MMXOP(psubusb_r64_rm64)(i386_state* cpustate)  // Opcode 0f d8
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1541,7 +1541,7 @@ static void MMXOP(psubusb_r64_rm64)(i386_state* cpustate)  // Opcode 0f d8
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psubusw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d9
+static void __FASTCALL MMXOP(psubusw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d9
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1559,7 +1559,7 @@ static void MMXOP(psubusw_r64_rm64)(i386_state* cpustate)  // Opcode 0f d9
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pand_r64_rm64)(i386_state* cpustate)  // Opcode 0f db
+static void __FASTCALL MMXOP(pand_r64_rm64)(i386_state* cpustate)  // Opcode 0f db
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1574,7 +1574,7 @@ static void MMXOP(pand_r64_rm64)(i386_state* cpustate)  // Opcode 0f db
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(paddusb_r64_rm64)(i386_state* cpustate)  // Opcode 0f dc
+static void __FASTCALL MMXOP(paddusb_r64_rm64)(i386_state* cpustate)  // Opcode 0f dc
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1592,7 +1592,7 @@ static void MMXOP(paddusb_r64_rm64)(i386_state* cpustate)  // Opcode 0f dc
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(paddusw_r64_rm64)(i386_state* cpustate)  // Opcode 0f dd
+static void __FASTCALL MMXOP(paddusw_r64_rm64)(i386_state* cpustate)  // Opcode 0f dd
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1610,7 +1610,7 @@ static void MMXOP(paddusw_r64_rm64)(i386_state* cpustate)  // Opcode 0f dd
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pandn_r64_rm64)(i386_state* cpustate)  // Opcode 0f df
+static void __FASTCALL MMXOP(pandn_r64_rm64)(i386_state* cpustate)  // Opcode 0f df
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1625,7 +1625,7 @@ static void MMXOP(pandn_r64_rm64)(i386_state* cpustate)  // Opcode 0f df
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psraw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e1
+static void __FASTCALL MMXOP(psraw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e1
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1648,7 +1648,7 @@ static void MMXOP(psraw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e1
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psrad_r64_rm64)(i386_state* cpustate)  // Opcode 0f e2
+static void __FASTCALL MMXOP(psrad_r64_rm64)(i386_state* cpustate)  // Opcode 0f e2
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1667,7 +1667,7 @@ static void MMXOP(psrad_r64_rm64)(i386_state* cpustate)  // Opcode 0f e2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pmulhw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e5
+static void __FASTCALL MMXOP(pmulhw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e5
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1688,7 +1688,7 @@ static void MMXOP(pmulhw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psubsb_r64_rm64)(i386_state* cpustate)  // Opcode 0f e8
+static void __FASTCALL MMXOP(psubsb_r64_rm64)(i386_state* cpustate)  // Opcode 0f e8
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1706,7 +1706,7 @@ static void MMXOP(psubsb_r64_rm64)(i386_state* cpustate)  // Opcode 0f e8
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psubsw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e9
+static void __FASTCALL MMXOP(psubsw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e9
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1724,7 +1724,7 @@ static void MMXOP(psubsw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e9
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(por_r64_rm64)(i386_state* cpustate)  // Opcode 0f eb
+static void __FASTCALL MMXOP(por_r64_rm64)(i386_state* cpustate)  // Opcode 0f eb
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1739,7 +1739,7 @@ static void MMXOP(por_r64_rm64)(i386_state* cpustate)  // Opcode 0f eb
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(paddsb_r64_rm64)(i386_state* cpustate)  // Opcode 0f ec
+static void __FASTCALL MMXOP(paddsb_r64_rm64)(i386_state* cpustate)  // Opcode 0f ec
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1757,7 +1757,7 @@ static void MMXOP(paddsb_r64_rm64)(i386_state* cpustate)  // Opcode 0f ec
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(paddsw_r64_rm64)(i386_state* cpustate)  // Opcode 0f ed
+static void __FASTCALL MMXOP(paddsw_r64_rm64)(i386_state* cpustate)  // Opcode 0f ed
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1775,7 +1775,7 @@ static void MMXOP(paddsw_r64_rm64)(i386_state* cpustate)  // Opcode 0f ed
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pxor_r64_rm64)(i386_state* cpustate)  // Opcode 0f ef
+static void __FASTCALL MMXOP(pxor_r64_rm64)(i386_state* cpustate)  // Opcode 0f ef
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1790,7 +1790,7 @@ static void MMXOP(pxor_r64_rm64)(i386_state* cpustate)  // Opcode 0f ef
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psllw_r64_rm64)(i386_state* cpustate)  // Opcode 0f f1
+static void __FASTCALL MMXOP(psllw_r64_rm64)(i386_state* cpustate)  // Opcode 0f f1
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1813,7 +1813,7 @@ static void MMXOP(psllw_r64_rm64)(i386_state* cpustate)  // Opcode 0f f1
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pslld_r64_rm64)(i386_state* cpustate)  // Opcode 0f f2
+static void __FASTCALL MMXOP(pslld_r64_rm64)(i386_state* cpustate)  // Opcode 0f f2
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1832,7 +1832,7 @@ static void MMXOP(pslld_r64_rm64)(i386_state* cpustate)  // Opcode 0f f2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psllq_r64_rm64)(i386_state* cpustate)  // Opcode 0f f3
+static void __FASTCALL MMXOP(psllq_r64_rm64)(i386_state* cpustate)  // Opcode 0f f3
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1849,7 +1849,7 @@ static void MMXOP(psllq_r64_rm64)(i386_state* cpustate)  // Opcode 0f f3
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pmaddwd_r64_rm64)(i386_state* cpustate)  // Opcode 0f f5
+static void __FASTCALL MMXOP(pmaddwd_r64_rm64)(i386_state* cpustate)  // Opcode 0f f5
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -1870,7 +1870,7 @@ static void MMXOP(pmaddwd_r64_rm64)(i386_state* cpustate)  // Opcode 0f f5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psubb_r64_rm64)(i386_state* cpustate)  // Opcode 0f f8
+static void __FASTCALL MMXOP(psubb_r64_rm64)(i386_state* cpustate)  // Opcode 0f f8
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1888,7 +1888,7 @@ static void MMXOP(psubb_r64_rm64)(i386_state* cpustate)  // Opcode 0f f8
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psubw_r64_rm64)(i386_state* cpustate)  // Opcode 0f f9
+static void __FASTCALL MMXOP(psubw_r64_rm64)(i386_state* cpustate)  // Opcode 0f f9
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1906,7 +1906,7 @@ static void MMXOP(psubw_r64_rm64)(i386_state* cpustate)  // Opcode 0f f9
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(psubd_r64_rm64)(i386_state* cpustate)  // Opcode 0f fa
+static void __FASTCALL MMXOP(psubd_r64_rm64)(i386_state* cpustate)  // Opcode 0f fa
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1924,7 +1924,7 @@ static void MMXOP(psubd_r64_rm64)(i386_state* cpustate)  // Opcode 0f fa
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(paddb_r64_rm64)(i386_state* cpustate)  // Opcode 0f fc
+static void __FASTCALL MMXOP(paddb_r64_rm64)(i386_state* cpustate)  // Opcode 0f fc
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1942,7 +1942,7 @@ static void MMXOP(paddb_r64_rm64)(i386_state* cpustate)  // Opcode 0f fc
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(paddw_r64_rm64)(i386_state* cpustate)  // Opcode 0f fd
+static void __FASTCALL MMXOP(paddw_r64_rm64)(i386_state* cpustate)  // Opcode 0f fd
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1960,7 +1960,7 @@ static void MMXOP(paddw_r64_rm64)(i386_state* cpustate)  // Opcode 0f fd
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(paddd_r64_rm64)(i386_state* cpustate)  // Opcode 0f fe
+static void __FASTCALL MMXOP(paddd_r64_rm64)(i386_state* cpustate)  // Opcode 0f fe
 {
        int n;
        MMXPROLOG(cpustate);
@@ -1978,7 +1978,7 @@ static void MMXOP(paddd_r64_rm64)(i386_state* cpustate)  // Opcode 0f fe
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(emms)(i386_state* cpustate) // Opcode 0f 77
+static void __FASTCALL MMXOP(emms)(i386_state* cpustate) // Opcode 0f 77
 {
        cpustate->x87_tw = 0xffff; // tag word = 0xffff
        // TODO
@@ -2241,7 +2241,7 @@ static void I386OP(cyrix_rsts)(i386_state* cpustate) // Opcode 0f 7d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(movd_r64_rm32)(i386_state* cpustate) // Opcode 0f 6e
+static void __FASTCALL MMXOP(movd_r64_rm32)(i386_state* cpustate) // Opcode 0f 6e
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2255,7 +2255,7 @@ static void MMXOP(movd_r64_rm32)(i386_state* cpustate) // Opcode 0f 6e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(movq_r64_rm64)(i386_state* cpustate) // Opcode 0f 6f
+static void __FASTCALL MMXOP(movq_r64_rm64)(i386_state* cpustate) // Opcode 0f 6f
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2268,7 +2268,7 @@ static void MMXOP(movq_r64_rm64)(i386_state* cpustate) // Opcode 0f 6f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(movd_rm32_r64)(i386_state* cpustate) // Opcode 0f 7e
+static void __FASTCALL MMXOP(movd_rm32_r64)(i386_state* cpustate) // Opcode 0f 7e
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2281,7 +2281,7 @@ static void MMXOP(movd_rm32_r64)(i386_state* cpustate) // Opcode 0f 7e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(movq_rm64_r64)(i386_state* cpustate) // Opcode 0f 7f
+static void __FASTCALL MMXOP(movq_rm64_r64)(i386_state* cpustate) // Opcode 0f 7f
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2294,7 +2294,7 @@ static void MMXOP(movq_rm64_r64)(i386_state* cpustate) // Opcode 0f 7f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pcmpeqb_r64_rm64)(i386_state* cpustate) // Opcode 0f 74
+static void __FASTCALL MMXOP(pcmpeqb_r64_rm64)(i386_state* cpustate) // Opcode 0f 74
 {
        int c;
        MMXPROLOG(cpustate);
@@ -2316,7 +2316,7 @@ static void MMXOP(pcmpeqb_r64_rm64)(i386_state* cpustate) // Opcode 0f 74
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pcmpeqw_r64_rm64)(i386_state* cpustate) // Opcode 0f 75
+static void __FASTCALL MMXOP(pcmpeqw_r64_rm64)(i386_state* cpustate) // Opcode 0f 75
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2341,7 +2341,7 @@ static void MMXOP(pcmpeqw_r64_rm64)(i386_state* cpustate) // Opcode 0f 75
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pcmpeqd_r64_rm64)(i386_state* cpustate) // Opcode 0f 76
+static void __FASTCALL MMXOP(pcmpeqd_r64_rm64)(i386_state* cpustate) // Opcode 0f 76
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2362,7 +2362,7 @@ static void MMXOP(pcmpeqd_r64_rm64)(i386_state* cpustate) // Opcode 0f 76
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pshufw_r64_rm64_i8)(i386_state* cpustate) // Opcode 0f 70
+static void __FASTCALL MMXOP(pshufw_r64_rm64_i8)(i386_state* cpustate) // Opcode 0f 70
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2515,7 +2515,7 @@ static void SSEOP(punpcklqdq_r128_rm128)(i386_state* cpustate)
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(punpcklbw_r64_r64m32)(i386_state* cpustate) // Opcode 0f 60
+static void __FASTCALL MMXOP(punpcklbw_r64_r64m32)(i386_state* cpustate) // Opcode 0f 60
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2551,7 +2551,7 @@ static void MMXOP(punpcklbw_r64_r64m32)(i386_state* cpustate) // Opcode 0f 60
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(punpcklwd_r64_r64m32)(i386_state* cpustate) // Opcode 0f 61
+static void __FASTCALL MMXOP(punpcklwd_r64_r64m32)(i386_state* cpustate) // Opcode 0f 61
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2580,7 +2580,7 @@ static void MMXOP(punpcklwd_r64_r64m32)(i386_state* cpustate) // Opcode 0f 61
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(punpckldq_r64_r64m32)(i386_state* cpustate) // Opcode 0f 62
+static void __FASTCALL MMXOP(punpckldq_r64_r64m32)(i386_state* cpustate) // Opcode 0f 62
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2601,7 +2601,7 @@ static void MMXOP(punpckldq_r64_r64m32)(i386_state* cpustate) // Opcode 0f 62
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(packsswb_r64_rm64)(i386_state* cpustate) // Opcode 0f 63
+static void __FASTCALL MMXOP(packsswb_r64_rm64)(i386_state* cpustate) // Opcode 0f 63
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2634,7 +2634,7 @@ static void MMXOP(packsswb_r64_rm64)(i386_state* cpustate) // Opcode 0f 63
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pcmpgtb_r64_rm64)(i386_state* cpustate) // Opcode 0f 64
+static void __FASTCALL MMXOP(pcmpgtb_r64_rm64)(i386_state* cpustate) // Opcode 0f 64
 {
        int c;
        MMXPROLOG(cpustate);
@@ -2656,7 +2656,7 @@ static void MMXOP(pcmpgtb_r64_rm64)(i386_state* cpustate) // Opcode 0f 64
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pcmpgtw_r64_rm64)(i386_state* cpustate) // Opcode 0f 65
+static void __FASTCALL MMXOP(pcmpgtw_r64_rm64)(i386_state* cpustate) // Opcode 0f 65
 {
        int c;
        MMXPROLOG(cpustate);
@@ -2678,7 +2678,7 @@ static void MMXOP(pcmpgtw_r64_rm64)(i386_state* cpustate) // Opcode 0f 65
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(pcmpgtd_r64_rm64)(i386_state* cpustate) // Opcode 0f 66
+static void __FASTCALL MMXOP(pcmpgtd_r64_rm64)(i386_state* cpustate) // Opcode 0f 66
 {
        int c;
        MMXPROLOG(cpustate);
@@ -2700,7 +2700,7 @@ static void MMXOP(pcmpgtd_r64_rm64)(i386_state* cpustate) // Opcode 0f 66
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(packuswb_r64_rm64)(i386_state* cpustate) // Opcode 0f 67
+static void __FASTCALL MMXOP(packuswb_r64_rm64)(i386_state* cpustate) // Opcode 0f 67
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2737,7 +2737,7 @@ static void MMXOP(packuswb_r64_rm64)(i386_state* cpustate) // Opcode 0f 67
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(punpckhbw_r64_rm64)(i386_state* cpustate) // Opcode 0f 68
+static void __FASTCALL MMXOP(punpckhbw_r64_rm64)(i386_state* cpustate) // Opcode 0f 68
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2770,7 +2770,7 @@ static void MMXOP(punpckhbw_r64_rm64)(i386_state* cpustate) // Opcode 0f 68
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(punpckhwd_r64_rm64)(i386_state* cpustate) // Opcode 0f 69
+static void __FASTCALL MMXOP(punpckhwd_r64_rm64)(i386_state* cpustate) // Opcode 0f 69
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2795,7 +2795,7 @@ static void MMXOP(punpckhwd_r64_rm64)(i386_state* cpustate) // Opcode 0f 69
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(punpckhdq_r64_rm64)(i386_state* cpustate) // Opcode 0f 6a
+static void __FASTCALL MMXOP(punpckhdq_r64_rm64)(i386_state* cpustate) // Opcode 0f 6a
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2816,7 +2816,7 @@ static void MMXOP(punpckhdq_r64_rm64)(i386_state* cpustate) // Opcode 0f 6a
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void MMXOP(packssdw_r64_rm64)(i386_state* cpustate) // Opcode 0f 6b
+static void __FASTCALL MMXOP(packssdw_r64_rm64)(i386_state* cpustate) // Opcode 0f 6b
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -2850,7 +2850,7 @@ static void MMXOP(packssdw_r64_rm64)(i386_state* cpustate) // Opcode 0f 6b
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(group_0fae)(i386_state* cpustate)  // Opcode 0f ae
+static void __FASTCALL SSEOP(group_0fae)(i386_state* cpustate)  // Opcode 0f ae
 {
        UINT8 modm = FETCH(cpustate);
        if( modm == 0xf8 ) {
@@ -2883,7 +2883,7 @@ static void SSEOP(group_0fae)(i386_state* cpustate)  // Opcode 0f ae
        }
 }
 
-static void SSEOP(cvttps2dq_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 5b
+static void __FASTCALL SSEOP(cvttps2dq_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 5b
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2903,7 +2903,7 @@ static void SSEOP(cvttps2dq_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 5b
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtss2sd_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5a
+static void __FASTCALL SSEOP(cvtss2sd_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5a
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2917,7 +2917,7 @@ static void SSEOP(cvtss2sd_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvttss2si_r32_r128m32)(i386_state* cpustate) // Opcode f3 0f 2c
+static void __FASTCALL SSEOP(cvttss2si_r32_r128m32)(i386_state* cpustate) // Opcode f3 0f 2c
 {
        INT32 src;
        UINT8 modrm = FETCH(cpustate); // get mordm byte
@@ -2933,7 +2933,7 @@ static void SSEOP(cvttss2si_r32_r128m32)(i386_state* cpustate) // Opcode f3 0f 2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtss2si_r32_r128m32)(i386_state* cpustate) // Opcode f3 0f 2d
+static void __FASTCALL SSEOP(cvtss2si_r32_r128m32)(i386_state* cpustate) // Opcode f3 0f 2d
 {
        INT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -2949,7 +2949,7 @@ static void SSEOP(cvtss2si_r32_r128m32)(i386_state* cpustate) // Opcode f3 0f 2d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtsi2ss_r128_rm32)(i386_state* cpustate) // Opcode f3 0f 2a
+static void __FASTCALL SSEOP(cvtsi2ss_r128_rm32)(i386_state* cpustate) // Opcode f3 0f 2a
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -2961,7 +2961,7 @@ static void SSEOP(cvtsi2ss_r128_rm32)(i386_state* cpustate) // Opcode f3 0f 2a
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtpi2ps_r128_rm64)(i386_state* cpustate) // Opcode 0f 2a
+static void __FASTCALL SSEOP(cvtpi2ps_r128_rm64)(i386_state* cpustate) // Opcode 0f 2a
 {
        UINT8 modrm = FETCH(cpustate);
        MMXPROLOG(cpustate);
@@ -2978,7 +2978,7 @@ static void SSEOP(cvtpi2ps_r128_rm64)(i386_state* cpustate) // Opcode 0f 2a
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvttps2pi_r64_r128m64)(i386_state* cpustate) // Opcode 0f 2c
+static void __FASTCALL SSEOP(cvttps2pi_r64_r128m64)(i386_state* cpustate) // Opcode 0f 2c
 {
        UINT8 modrm = FETCH(cpustate);
        MMXPROLOG(cpustate);
@@ -2995,7 +2995,7 @@ static void SSEOP(cvttps2pi_r64_r128m64)(i386_state* cpustate) // Opcode 0f 2c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtps2pi_r64_r128m64)(i386_state* cpustate) // Opcode 0f 2d
+static void __FASTCALL SSEOP(cvtps2pi_r64_r128m64)(i386_state* cpustate) // Opcode 0f 2d
 {
        UINT8 modrm = FETCH(cpustate);
        MMXPROLOG(cpustate);
@@ -3012,7 +3012,7 @@ static void SSEOP(cvtps2pi_r64_r128m64)(i386_state* cpustate) // Opcode 0f 2d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtps2pd_r128_r128m64)(i386_state* cpustate) // Opcode 0f 5a
+static void __FASTCALL SSEOP(cvtps2pd_r128_r128m64)(i386_state* cpustate) // Opcode 0f 5a
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3028,7 +3028,7 @@ static void SSEOP(cvtps2pd_r128_r128m64)(i386_state* cpustate) // Opcode 0f 5a
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtdq2ps_r128_rm128)(i386_state* cpustate) // Opcode 0f 5b
+static void __FASTCALL SSEOP(cvtdq2ps_r128_rm128)(i386_state* cpustate) // Opcode 0f 5b
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3048,7 +3048,7 @@ static void SSEOP(cvtdq2ps_r128_rm128)(i386_state* cpustate) // Opcode 0f 5b
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtdq2pd_r128_r128m64)(i386_state* cpustate) // Opcode f3 0f e6
+static void __FASTCALL SSEOP(cvtdq2pd_r128_r128m64)(i386_state* cpustate) // Opcode f3 0f e6
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3064,7 +3064,7 @@ static void SSEOP(cvtdq2pd_r128_r128m64)(i386_state* cpustate) // Opcode f3 0f e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movss_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 10
+static void __FASTCALL SSEOP(movss_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 10
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3076,7 +3076,7 @@ static void SSEOP(movss_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 10
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movss_rm128_r128)(i386_state* cpustate) // Opcode f3 0f 11
+static void __FASTCALL SSEOP(movss_rm128_r128)(i386_state* cpustate) // Opcode f3 0f 11
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3088,7 +3088,7 @@ static void SSEOP(movss_rm128_r128)(i386_state* cpustate) // Opcode f3 0f 11
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movsldup_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 12
+static void __FASTCALL SSEOP(movsldup_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 12
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3108,7 +3108,7 @@ static void SSEOP(movsldup_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 12
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movshdup_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 16
+static void __FASTCALL SSEOP(movshdup_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 16
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3128,7 +3128,7 @@ static void SSEOP(movshdup_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 16
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movaps_r128_rm128)(i386_state* cpustate) // Opcode 0f 28
+static void __FASTCALL SSEOP(movaps_r128_rm128)(i386_state* cpustate) // Opcode 0f 28
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3140,7 +3140,7 @@ static void SSEOP(movaps_r128_rm128)(i386_state* cpustate) // Opcode 0f 28
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movaps_rm128_r128)(i386_state* cpustate) // Opcode 0f 29
+static void __FASTCALL SSEOP(movaps_rm128_r128)(i386_state* cpustate) // Opcode 0f 29
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3152,7 +3152,7 @@ static void SSEOP(movaps_rm128_r128)(i386_state* cpustate) // Opcode 0f 29
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movups_r128_rm128)(i386_state* cpustate) // Opcode 0f 10
+static void __FASTCALL SSEOP(movups_r128_rm128)(i386_state* cpustate) // Opcode 0f 10
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3164,7 +3164,7 @@ static void SSEOP(movups_r128_rm128)(i386_state* cpustate) // Opcode 0f 10
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movupd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 10
+static void __FASTCALL SSEOP(movupd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 10
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3176,7 +3176,7 @@ static void SSEOP(movupd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 10
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movups_rm128_r128)(i386_state* cpustate) // Opcode 0f 11
+static void __FASTCALL SSEOP(movups_rm128_r128)(i386_state* cpustate) // Opcode 0f 11
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3188,7 +3188,7 @@ static void SSEOP(movups_rm128_r128)(i386_state* cpustate) // Opcode 0f 11
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movupd_rm128_r128)(i386_state* cpustate) // Opcode 66 0f 11
+static void __FASTCALL SSEOP(movupd_rm128_r128)(i386_state* cpustate) // Opcode 66 0f 11
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3200,7 +3200,7 @@ static void SSEOP(movupd_rm128_r128)(i386_state* cpustate) // Opcode 66 0f 11
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movlps_r128_m64)(i386_state* cpustate) // Opcode 0f 12
+static void __FASTCALL SSEOP(movlps_r128_m64)(i386_state* cpustate) // Opcode 0f 12
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3215,7 +3215,7 @@ static void SSEOP(movlps_r128_m64)(i386_state* cpustate) // Opcode 0f 12
        }
 }
 
-static void SSEOP(movlpd_r128_m64)(i386_state* cpustate) // Opcode 66 0f 12
+static void __FASTCALL SSEOP(movlpd_r128_m64)(i386_state* cpustate) // Opcode 66 0f 12
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3228,7 +3228,7 @@ static void SSEOP(movlpd_r128_m64)(i386_state* cpustate) // Opcode 66 0f 12
        }
 }
 
-static void SSEOP(movlps_m64_r128)(i386_state* cpustate) // Opcode 0f 13
+static void __FASTCALL SSEOP(movlps_m64_r128)(i386_state* cpustate) // Opcode 0f 13
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3241,7 +3241,7 @@ static void SSEOP(movlps_m64_r128)(i386_state* cpustate) // Opcode 0f 13
        }
 }
 
-static void SSEOP(movlpd_m64_r128)(i386_state* cpustate) // Opcode 66 0f 13
+static void __FASTCALL SSEOP(movlpd_m64_r128)(i386_state* cpustate) // Opcode 66 0f 13
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3254,7 +3254,7 @@ static void SSEOP(movlpd_m64_r128)(i386_state* cpustate) // Opcode 66 0f 13
        }
 }
 
-static void SSEOP(movhps_r128_m64)(i386_state* cpustate) // Opcode 0f 16
+static void __FASTCALL SSEOP(movhps_r128_m64)(i386_state* cpustate) // Opcode 0f 16
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3269,7 +3269,7 @@ static void SSEOP(movhps_r128_m64)(i386_state* cpustate) // Opcode 0f 16
        }
 }
 
-static void SSEOP(movhpd_r128_m64)(i386_state* cpustate) // Opcode 66 0f 16
+static void __FASTCALL SSEOP(movhpd_r128_m64)(i386_state* cpustate) // Opcode 66 0f 16
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3283,7 +3283,7 @@ static void SSEOP(movhpd_r128_m64)(i386_state* cpustate) // Opcode 66 0f 16
        }
 }
 
-static void SSEOP(movhps_m64_r128)(i386_state* cpustate) // Opcode 0f 17
+static void __FASTCALL SSEOP(movhps_m64_r128)(i386_state* cpustate) // Opcode 0f 17
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3296,7 +3296,7 @@ static void SSEOP(movhps_m64_r128)(i386_state* cpustate) // Opcode 0f 17
        }
 }
 
-static void SSEOP(movhpd_m64_r128)(i386_state* cpustate) // Opcode 66 0f 17
+static void __FASTCALL SSEOP(movhpd_m64_r128)(i386_state* cpustate) // Opcode 66 0f 17
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3309,7 +3309,7 @@ static void SSEOP(movhpd_m64_r128)(i386_state* cpustate) // Opcode 66 0f 17
        }
 }
 
-static void SSEOP(movntps_m128_r128)(i386_state* cpustate) // Opcode 0f 2b
+static void __FASTCALL SSEOP(movntps_m128_r128)(i386_state* cpustate) // Opcode 0f 2b
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3323,7 +3323,7 @@ static void SSEOP(movntps_m128_r128)(i386_state* cpustate) // Opcode 0f 2b
        }
 }
 
-static void SSEOP(movmskps_r16_r128)(i386_state* cpustate) // Opcode 0f 50
+static void __FASTCALL SSEOP(movmskps_r16_r128)(i386_state* cpustate) // Opcode 0f 50
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3337,7 +3337,7 @@ static void SSEOP(movmskps_r16_r128)(i386_state* cpustate) // Opcode 0f 50
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movmskps_r32_r128)(i386_state* cpustate) // Opcode 0f 50
+static void __FASTCALL SSEOP(movmskps_r32_r128)(i386_state* cpustate) // Opcode 0f 50
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3351,7 +3351,7 @@ static void SSEOP(movmskps_r32_r128)(i386_state* cpustate) // Opcode 0f 50
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movmskpd_r32_r128)(i386_state* cpustate) // Opcode 66 0f 50
+static void __FASTCALL SSEOP(movmskpd_r32_r128)(i386_state* cpustate) // Opcode 66 0f 50
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3363,7 +3363,7 @@ static void SSEOP(movmskpd_r32_r128)(i386_state* cpustate) // Opcode 66 0f 50
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movq2dq_r128_r64)(i386_state* cpustate) // Opcode f3 0f d6
+static void __FASTCALL SSEOP(movq2dq_r128_r64)(i386_state* cpustate) // Opcode f3 0f d6
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -3374,7 +3374,7 @@ static void SSEOP(movq2dq_r128_r64)(i386_state* cpustate) // Opcode f3 0f d6
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movdqu_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 6f
+static void __FASTCALL SSEOP(movdqu_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 6f
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -3388,7 +3388,7 @@ static void SSEOP(movdqu_r128_rm128)(i386_state* cpustate) // Opcode f3 0f 6f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movdqu_rm128_r128)(i386_state* cpustate) // Opcode f3 0f 7f
+static void __FASTCALL SSEOP(movdqu_rm128_r128)(i386_state* cpustate) // Opcode f3 0f 7f
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -3402,7 +3402,7 @@ static void SSEOP(movdqu_rm128_r128)(i386_state* cpustate) // Opcode f3 0f 7f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movd_m128_rm32)(i386_state* cpustate) // Opcode 66 0f 6e
+static void __FASTCALL SSEOP(movd_m128_rm32)(i386_state* cpustate) // Opcode 66 0f 6e
 {
        UINT8 modrm = FETCH(cpustate);
        if (modrm >= 0xc0) {
@@ -3417,7 +3417,7 @@ static void SSEOP(movd_m128_rm32)(i386_state* cpustate) // Opcode 66 0f 6e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movdqa_m128_rm128)(i386_state* cpustate) // Opcode 66 0f 6f
+static void __FASTCALL SSEOP(movdqa_m128_rm128)(i386_state* cpustate) // Opcode 66 0f 6f
 {
        UINT8 modrm = FETCH(cpustate);
        if (modrm >= 0xc0) {
@@ -3431,7 +3431,7 @@ static void SSEOP(movdqa_m128_rm128)(i386_state* cpustate) // Opcode 66 0f 6f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movq_r128_r128m64)(i386_state* cpustate) // Opcode f3 0f 7e
+static void __FASTCALL SSEOP(movq_r128_r128m64)(i386_state* cpustate) // Opcode f3 0f 7e
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -3446,7 +3446,7 @@ static void SSEOP(movq_r128_r128m64)(i386_state* cpustate) // Opcode f3 0f 7e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movd_rm32_r128)(i386_state* cpustate) // Opcode 66 0f 7e
+static void __FASTCALL SSEOP(movd_rm32_r128)(i386_state* cpustate) // Opcode 66 0f 7e
 {
        UINT8 modrm = FETCH(cpustate);
        if (modrm >= 0xc0) {
@@ -3459,7 +3459,7 @@ static void SSEOP(movd_rm32_r128)(i386_state* cpustate) // Opcode 66 0f 7e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movdqa_rm128_r128)(i386_state* cpustate) // Opcode 66 0f 7f
+static void __FASTCALL SSEOP(movdqa_rm128_r128)(i386_state* cpustate) // Opcode 66 0f 7f
 {
        UINT8 modrm = FETCH(cpustate);
        if (modrm >= 0xc0) {
@@ -3473,7 +3473,7 @@ static void SSEOP(movdqa_rm128_r128)(i386_state* cpustate) // Opcode 66 0f 7f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmovmskb_r16_r64)(i386_state* cpustate) // Opcode 0f d7
+static void __FASTCALL SSEOP(pmovmskb_r16_r64)(i386_state* cpustate) // Opcode 0f d7
 {
        //MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -3492,7 +3492,7 @@ static void SSEOP(pmovmskb_r16_r64)(i386_state* cpustate) // Opcode 0f d7
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmovmskb_r32_r64)(i386_state* cpustate) // Opcode 0f d7
+static void __FASTCALL SSEOP(pmovmskb_r32_r64)(i386_state* cpustate) // Opcode 0f d7
 {
        //MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -3511,7 +3511,7 @@ static void SSEOP(pmovmskb_r32_r64)(i386_state* cpustate) // Opcode 0f d7
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmovmskb_r32_r128)(i386_state* cpustate) // Opcode 66 0f d7
+static void __FASTCALL SSEOP(pmovmskb_r32_r128)(i386_state* cpustate) // Opcode 66 0f d7
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3537,7 +3537,7 @@ static void SSEOP(pmovmskb_r32_r128)(i386_state* cpustate) // Opcode 66 0f d7
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(xorps)(i386_state* cpustate) // Opcode 0f 57
+static void __FASTCALL SSEOP(xorps)(i386_state* cpustate) // Opcode 0f 57
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3557,7 +3557,7 @@ static void SSEOP(xorps)(i386_state* cpustate) // Opcode 0f 57
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(xorpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 57
+static void __FASTCALL SSEOP(xorpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 57
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3573,7 +3573,7 @@ static void SSEOP(xorpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 57
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(addps)(i386_state* cpustate) // Opcode 0f 58
+static void __FASTCALL SSEOP(addps)(i386_state* cpustate) // Opcode 0f 58
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3593,7 +3593,7 @@ static void SSEOP(addps)(i386_state* cpustate) // Opcode 0f 58
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(sqrtps_r128_rm128)(i386_state* cpustate) // Opcode 0f 51
+static void __FASTCALL SSEOP(sqrtps_r128_rm128)(i386_state* cpustate) // Opcode 0f 51
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3613,7 +3613,7 @@ static void SSEOP(sqrtps_r128_rm128)(i386_state* cpustate) // Opcode 0f 51
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(rsqrtps_r128_rm128)(i386_state* cpustate) // Opcode 0f 52
+static void __FASTCALL SSEOP(rsqrtps_r128_rm128)(i386_state* cpustate) // Opcode 0f 52
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3633,7 +3633,7 @@ static void SSEOP(rsqrtps_r128_rm128)(i386_state* cpustate) // Opcode 0f 52
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(rcpps_r128_rm128)(i386_state* cpustate) // Opcode 0f 53
+static void __FASTCALL SSEOP(rcpps_r128_rm128)(i386_state* cpustate) // Opcode 0f 53
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3653,7 +3653,7 @@ static void SSEOP(rcpps_r128_rm128)(i386_state* cpustate) // Opcode 0f 53
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(andps_r128_rm128)(i386_state* cpustate) // Opcode 0f 54
+static void __FASTCALL SSEOP(andps_r128_rm128)(i386_state* cpustate) // Opcode 0f 54
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3669,7 +3669,7 @@ static void SSEOP(andps_r128_rm128)(i386_state* cpustate) // Opcode 0f 54
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(andpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 54
+static void __FASTCALL SSEOP(andpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 54
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3685,7 +3685,7 @@ static void SSEOP(andpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 54
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(andnps_r128_rm128)(i386_state* cpustate) // Opcode 0f 55
+static void __FASTCALL SSEOP(andnps_r128_rm128)(i386_state* cpustate) // Opcode 0f 55
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3701,7 +3701,7 @@ static void SSEOP(andnps_r128_rm128)(i386_state* cpustate) // Opcode 0f 55
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(andnpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 55
+static void __FASTCALL SSEOP(andnpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 55
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3717,7 +3717,7 @@ static void SSEOP(andnpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 55
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(orps_r128_rm128)(i386_state* cpustate) // Opcode 0f 56
+static void __FASTCALL SSEOP(orps_r128_rm128)(i386_state* cpustate) // Opcode 0f 56
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3733,7 +3733,7 @@ static void SSEOP(orps_r128_rm128)(i386_state* cpustate) // Opcode 0f 56
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(orpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 56
+static void __FASTCALL SSEOP(orpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 56
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3749,7 +3749,7 @@ static void SSEOP(orpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 56
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(mulps)(i386_state* cpustate) // Opcode 0f 59 ????
+static void __FASTCALL SSEOP(mulps)(i386_state* cpustate) // Opcode 0f 59 ????
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3769,7 +3769,7 @@ static void SSEOP(mulps)(i386_state* cpustate) // Opcode 0f 59 ????
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(subps)(i386_state* cpustate) // Opcode 0f 5c
+static void __FASTCALL SSEOP(subps)(i386_state* cpustate) // Opcode 0f 5c
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3815,7 +3815,7 @@ INLINE double sse_min_double(double src1, double src2)
        return src2;
 }
 
-static void SSEOP(minps)(i386_state* cpustate) // Opcode 0f 5d
+static void __FASTCALL SSEOP(minps)(i386_state* cpustate) // Opcode 0f 5d
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3835,7 +3835,7 @@ static void SSEOP(minps)(i386_state* cpustate) // Opcode 0f 5d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(divps)(i386_state* cpustate) // Opcode 0f 5e
+static void __FASTCALL SSEOP(divps)(i386_state* cpustate) // Opcode 0f 5e
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3881,7 +3881,7 @@ INLINE double sse_max_double(double src1, double src2)
        return src2;
 }
 
-static void SSEOP(maxps)(i386_state* cpustate) // Opcode 0f 5f
+static void __FASTCALL SSEOP(maxps)(i386_state* cpustate) // Opcode 0f 5f
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3901,7 +3901,7 @@ static void SSEOP(maxps)(i386_state* cpustate) // Opcode 0f 5f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(maxss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5f
+static void __FASTCALL SSEOP(maxss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5f
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3915,7 +3915,7 @@ static void SSEOP(maxss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(addss)(i386_state* cpustate) // Opcode f3 0f 58
+static void __FASTCALL SSEOP(addss)(i386_state* cpustate) // Opcode f3 0f 58
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3929,7 +3929,7 @@ static void SSEOP(addss)(i386_state* cpustate) // Opcode f3 0f 58
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(subss)(i386_state* cpustate) // Opcode f3 0f 5c
+static void __FASTCALL SSEOP(subss)(i386_state* cpustate) // Opcode f3 0f 5c
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3943,7 +3943,7 @@ static void SSEOP(subss)(i386_state* cpustate) // Opcode f3 0f 5c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(mulss)(i386_state* cpustate) // Opcode f3 0f 5e
+static void __FASTCALL SSEOP(mulss)(i386_state* cpustate) // Opcode f3 0f 5e
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3957,7 +3957,7 @@ static void SSEOP(mulss)(i386_state* cpustate) // Opcode f3 0f 5e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(divss)(i386_state* cpustate) // Opcode 0f 59
+static void __FASTCALL SSEOP(divss)(i386_state* cpustate) // Opcode 0f 59
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3971,7 +3971,7 @@ static void SSEOP(divss)(i386_state* cpustate) // Opcode 0f 59
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(rcpss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 53
+static void __FASTCALL SSEOP(rcpss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 53
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3985,7 +3985,7 @@ static void SSEOP(rcpss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 53
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(sqrtss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 51
+static void __FASTCALL SSEOP(sqrtss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 51
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -3999,7 +3999,7 @@ static void SSEOP(sqrtss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 51
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(rsqrtss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 52
+static void __FASTCALL SSEOP(rsqrtss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 52
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4013,7 +4013,7 @@ static void SSEOP(rsqrtss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 52
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(minss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5d
+static void __FASTCALL SSEOP(minss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5d
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4027,7 +4027,7 @@ static void SSEOP(minss_r128_r128m32)(i386_state* cpustate) // Opcode f3 0f 5d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(comiss_r128_r128m32)(i386_state* cpustate) // Opcode 0f 2f
+static void __FASTCALL SSEOP(comiss_r128_r128m32)(i386_state* cpustate) // Opcode 0f 2f
 {
        float32 a,b;
        UINT8 modrm = FETCH(cpustate);
@@ -4064,7 +4064,7 @@ static void SSEOP(comiss_r128_r128m32)(i386_state* cpustate) // Opcode 0f 2f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(comisd_r128_r128m64)(i386_state* cpustate) // Opcode 66 0f 2f
+static void __FASTCALL SSEOP(comisd_r128_r128m64)(i386_state* cpustate) // Opcode 66 0f 2f
 {
        float64 a,b;
        UINT8 modrm = FETCH(cpustate);
@@ -4101,7 +4101,7 @@ static void SSEOP(comisd_r128_r128m64)(i386_state* cpustate) // Opcode 66 0f 2f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(ucomiss_r128_r128m32)(i386_state* cpustate) // Opcode 0f 2e
+static void __FASTCALL SSEOP(ucomiss_r128_r128m32)(i386_state* cpustate) // Opcode 0f 2e
 {
        float32 a,b;
        UINT8 modrm = FETCH(cpustate);
@@ -4138,7 +4138,7 @@ static void SSEOP(ucomiss_r128_r128m32)(i386_state* cpustate) // Opcode 0f 2e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(ucomisd_r128_r128m64)(i386_state* cpustate) // Opcode 66 0f 2e
+static void __FASTCALL SSEOP(ucomisd_r128_r128m64)(i386_state* cpustate) // Opcode 66 0f 2e
 {
        float64 a,b;
        UINT8 modrm = FETCH(cpustate);
@@ -4175,7 +4175,7 @@ static void SSEOP(ucomisd_r128_r128m64)(i386_state* cpustate) // Opcode 66 0f 2e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(shufps)(i386_state* cpustate) // Opcode 0f c6
+static void __FASTCALL SSEOP(shufps)(i386_state* cpustate) // Opcode 0f c6
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 sel = FETCH(cpustate);
@@ -4212,7 +4212,7 @@ static void SSEOP(shufps)(i386_state* cpustate) // Opcode 0f c6
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(shufpd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f c6
+static void __FASTCALL SSEOP(shufpd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f c6
 {
        UINT8 modrm = FETCH(cpustate);
        UINT8 sel = FETCH(cpustate);
@@ -4240,7 +4240,7 @@ static void SSEOP(shufpd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f c6
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(unpcklps_r128_rm128)(i386_state* cpustate) // Opcode 0f 14
+static void __FASTCALL SSEOP(unpcklps_r128_rm128)(i386_state* cpustate) // Opcode 0f 14
 {
        UINT8 modrm = FETCH(cpustate);
        int s,d;
@@ -4268,7 +4268,7 @@ static void SSEOP(unpcklps_r128_rm128)(i386_state* cpustate) // Opcode 0f 14
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(unpcklpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 14
+static void __FASTCALL SSEOP(unpcklpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 14
 {
        UINT8 modrm = FETCH(cpustate);
        int s,d;
@@ -4287,7 +4287,7 @@ static void SSEOP(unpcklpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 14
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(unpckhps_r128_rm128)(i386_state* cpustate) // Opcode 0f 15
+static void __FASTCALL SSEOP(unpckhps_r128_rm128)(i386_state* cpustate) // Opcode 0f 15
 {
        UINT8 modrm = FETCH(cpustate);
        int s,d;
@@ -4317,7 +4317,7 @@ static void SSEOP(unpckhps_r128_rm128)(i386_state* cpustate) // Opcode 0f 15
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(unpckhpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 15
+static void __FASTCALL SSEOP(unpckhpd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 15
 {
        UINT8 modrm = FETCH(cpustate);
        int s,d;
@@ -4360,7 +4360,7 @@ INLINE bool sse_isdoubleunordered(double op1, double op2)
        return !((op1 != op1) || (op1 != op2));
 }
 
-static void SSEOP(predicate_compare_single)(UINT8 imm8, XMM_REG d, XMM_REG s)
+static void __FASTCALL SSEOP(predicate_compare_single)(UINT8 imm8, XMM_REG d, XMM_REG s)
 {
        switch (imm8 & 7)
        {
@@ -4415,7 +4415,7 @@ static void SSEOP(predicate_compare_single)(UINT8 imm8, XMM_REG d, XMM_REG s)
        }
 }
 
-static void SSEOP(predicate_compare_double)(UINT8 imm8, XMM_REG d, XMM_REG s)
+static void __FASTCALL SSEOP(predicate_compare_double)(UINT8 imm8, XMM_REG d, XMM_REG s)
 {
        switch (imm8 & 7)
        {
@@ -4454,7 +4454,7 @@ static void SSEOP(predicate_compare_double)(UINT8 imm8, XMM_REG d, XMM_REG s)
        }
 }
 
-static void SSEOP(predicate_compare_single_scalar)(UINT8 imm8, XMM_REG d, XMM_REG s)
+static void __FASTCALL SSEOP(predicate_compare_single_scalar)(UINT8 imm8, XMM_REG d, XMM_REG s)
 {
        switch (imm8 & 7)
        {
@@ -4485,7 +4485,7 @@ static void SSEOP(predicate_compare_single_scalar)(UINT8 imm8, XMM_REG d, XMM_RE
        }
 }
 
-static void SSEOP(predicate_compare_double_scalar)(UINT8 imm8, XMM_REG d, XMM_REG s)
+static void __FASTCALL SSEOP(predicate_compare_double_scalar)(UINT8 imm8, XMM_REG d, XMM_REG s)
 {
        switch (imm8 & 7)
        {
@@ -4516,7 +4516,7 @@ static void SSEOP(predicate_compare_double_scalar)(UINT8 imm8, XMM_REG d, XMM_RE
        }
 }
 
-static void SSEOP(cmpps_r128_rm128_i8)(i386_state* cpustate) // Opcode 0f c2
+static void __FASTCALL SSEOP(cmpps_r128_rm128_i8)(i386_state* cpustate) // Opcode 0f c2
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4537,7 +4537,7 @@ static void SSEOP(cmpps_r128_rm128_i8)(i386_state* cpustate) // Opcode 0f c2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cmppd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f c2
+static void __FASTCALL SSEOP(cmppd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f c2
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4558,7 +4558,7 @@ static void SSEOP(cmppd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f c2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cmpss_r128_r128m32_i8)(i386_state* cpustate) // Opcode f3 0f c2
+static void __FASTCALL SSEOP(cmpss_r128_r128m32_i8)(i386_state* cpustate) // Opcode f3 0f c2
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4579,7 +4579,7 @@ static void SSEOP(cmpss_r128_r128m32_i8)(i386_state* cpustate) // Opcode f3 0f c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pinsrw_r64_r16m16_i8)(i386_state* cpustate) // Opcode 0f c4, 16bit register
+static void __FASTCALL SSEOP(pinsrw_r64_r16m16_i8)(i386_state* cpustate) // Opcode 0f c4, 16bit register
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -4602,7 +4602,7 @@ static void SSEOP(pinsrw_r64_r16m16_i8)(i386_state* cpustate) // Opcode 0f c4, 1
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pinsrw_r64_r32m16_i8)(i386_state* cpustate) // Opcode 0f c4, 32bit register
+static void __FASTCALL SSEOP(pinsrw_r64_r32m16_i8)(i386_state* cpustate) // Opcode 0f c4, 32bit register
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -4619,7 +4619,7 @@ static void SSEOP(pinsrw_r64_r32m16_i8)(i386_state* cpustate) // Opcode 0f c4, 3
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pinsrw_r128_r32m16_i8)(i386_state* cpustate) // Opcode 66 0f c4
+static void __FASTCALL SSEOP(pinsrw_r128_r32m16_i8)(i386_state* cpustate) // Opcode 66 0f c4
 {
        UINT8 modrm = FETCH(cpustate);
        if (modrm >= 0xc0) {
@@ -4636,7 +4636,7 @@ static void SSEOP(pinsrw_r128_r32m16_i8)(i386_state* cpustate) // Opcode 66 0f c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pextrw_r16_r64_i8)(i386_state* cpustate) // Opcode 0f c5
+static void __FASTCALL SSEOP(pextrw_r16_r64_i8)(i386_state* cpustate) // Opcode 0f c5
 {
        //MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -4653,7 +4653,7 @@ static void SSEOP(pextrw_r16_r64_i8)(i386_state* cpustate) // Opcode 0f c5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pextrw_r32_r64_i8)(i386_state* cpustate) // Opcode 0f c5
+static void __FASTCALL SSEOP(pextrw_r32_r64_i8)(i386_state* cpustate) // Opcode 0f c5
 {
        //MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -4667,7 +4667,7 @@ static void SSEOP(pextrw_r32_r64_i8)(i386_state* cpustate) // Opcode 0f c5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pextrw_reg_r128_i8)(i386_state* cpustate) // Opcode 66 0f c5
+static void __FASTCALL SSEOP(pextrw_reg_r128_i8)(i386_state* cpustate) // Opcode 66 0f c5
 {
        UINT8 modrm = FETCH(cpustate);
        if (modrm >= 0xc0) {
@@ -4681,7 +4681,7 @@ static void SSEOP(pextrw_reg_r128_i8)(i386_state* cpustate) // Opcode 66 0f c5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pminub_r64_rm64)(i386_state* cpustate) // Opcode 0f da
+static void __FASTCALL SSEOP(pminub_r64_rm64)(i386_state* cpustate) // Opcode 0f da
 {
        int n;
        MMXPROLOG(cpustate);
@@ -4699,7 +4699,7 @@ static void SSEOP(pminub_r64_rm64)(i386_state* cpustate) // Opcode 0f da
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pminub_r128_rm128)(i386_state* cpustate) // Opcode 66 0f da
+static void __FASTCALL SSEOP(pminub_r128_rm128)(i386_state* cpustate) // Opcode 66 0f da
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4715,7 +4715,7 @@ static void SSEOP(pminub_r128_rm128)(i386_state* cpustate) // Opcode 66 0f da
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmaxub_r64_rm64)(i386_state* cpustate) // Opcode 0f de
+static void __FASTCALL SSEOP(pmaxub_r64_rm64)(i386_state* cpustate) // Opcode 0f de
 {
        int n;
        MMXPROLOG(cpustate);
@@ -4733,7 +4733,7 @@ static void SSEOP(pmaxub_r64_rm64)(i386_state* cpustate) // Opcode 0f de
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pavgb_r64_rm64)(i386_state* cpustate) // Opcode 0f e0
+static void __FASTCALL SSEOP(pavgb_r64_rm64)(i386_state* cpustate) // Opcode 0f e0
 {
        int n;
        MMXPROLOG(cpustate);
@@ -4751,7 +4751,7 @@ static void SSEOP(pavgb_r64_rm64)(i386_state* cpustate) // Opcode 0f e0
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pavgw_r64_rm64)(i386_state* cpustate) // Opcode 0f e3
+static void __FASTCALL SSEOP(pavgw_r64_rm64)(i386_state* cpustate) // Opcode 0f e3
 {
        int n;
        MMXPROLOG(cpustate);
@@ -4769,7 +4769,7 @@ static void SSEOP(pavgw_r64_rm64)(i386_state* cpustate) // Opcode 0f e3
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmulhuw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e4
+static void __FASTCALL SSEOP(pmulhuw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e4
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -4790,7 +4790,7 @@ static void SSEOP(pmulhuw_r64_rm64)(i386_state* cpustate)  // Opcode 0f e4
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pminsw_r64_rm64)(i386_state* cpustate) // Opcode 0f ea
+static void __FASTCALL SSEOP(pminsw_r64_rm64)(i386_state* cpustate) // Opcode 0f ea
 {
        int n;
        MMXPROLOG(cpustate);
@@ -4808,7 +4808,7 @@ static void SSEOP(pminsw_r64_rm64)(i386_state* cpustate) // Opcode 0f ea
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmaxsw_r64_rm64)(i386_state* cpustate) // Opcode 0f ee
+static void __FASTCALL SSEOP(pmaxsw_r64_rm64)(i386_state* cpustate) // Opcode 0f ee
 {
        int n;
        MMXPROLOG(cpustate);
@@ -4826,7 +4826,7 @@ static void SSEOP(pmaxsw_r64_rm64)(i386_state* cpustate) // Opcode 0f ee
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmuludq_r64_rm64)(i386_state* cpustate) // Opcode 0f f4
+static void __FASTCALL SSEOP(pmuludq_r64_rm64)(i386_state* cpustate) // Opcode 0f f4
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -4841,7 +4841,7 @@ static void SSEOP(pmuludq_r64_rm64)(i386_state* cpustate) // Opcode 0f f4
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmuludq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f f4
+static void __FASTCALL SSEOP(pmuludq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f f4
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4857,7 +4857,7 @@ static void SSEOP(pmuludq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f f4
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psadbw_r64_rm64)(i386_state* cpustate) // Opcode 0f f6
+static void __FASTCALL SSEOP(psadbw_r64_rm64)(i386_state* cpustate) // Opcode 0f f6
 {
        int n;
        INT32 temp;
@@ -4880,7 +4880,7 @@ static void SSEOP(psadbw_r64_rm64)(i386_state* cpustate) // Opcode 0f f6
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubq_r64_rm64)(i386_state* cpustate)  // Opcode 0f fb
+static void __FASTCALL SSEOP(psubq_r64_rm64)(i386_state* cpustate)  // Opcode 0f fb
 {
        MMXPROLOG(cpustate);
        UINT8 modrm = FETCH(cpustate);
@@ -4895,7 +4895,7 @@ static void SSEOP(psubq_r64_rm64)(i386_state* cpustate)  // Opcode 0f fb
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fb
+static void __FASTCALL SSEOP(psubq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fb
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4911,7 +4911,7 @@ static void SSEOP(psubq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fb
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pshufd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f 70
+static void __FASTCALL SSEOP(pshufd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f 70
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4940,7 +4940,7 @@ static void SSEOP(pshufd_r128_rm128_i8)(i386_state* cpustate) // Opcode 66 0f 70
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pshuflw_r128_rm128_i8)(i386_state* cpustate) // Opcode f2 0f 70
+static void __FASTCALL SSEOP(pshuflw_r128_rm128_i8)(i386_state* cpustate) // Opcode f2 0f 70
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -4970,7 +4970,7 @@ static void SSEOP(pshuflw_r128_rm128_i8)(i386_state* cpustate) // Opcode f2 0f 7
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pshufhw_r128_rm128_i8)(i386_state* cpustate) // Opcode f3 0f 70
+static void __FASTCALL SSEOP(pshufhw_r128_rm128_i8)(i386_state* cpustate) // Opcode f3 0f 70
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5000,7 +5000,7 @@ static void SSEOP(pshufhw_r128_rm128_i8)(i386_state* cpustate) // Opcode f3 0f 7
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(packsswb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 63
+static void __FASTCALL SSEOP(packsswb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 63
 {
        UINT8 modrm = FETCH(cpustate);
        if (modrm >= 0xc0) {
@@ -5028,7 +5028,7 @@ static void SSEOP(packsswb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 63
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(packssdw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6b
+static void __FASTCALL SSEOP(packssdw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6b
 {
        UINT8 modrm = FETCH(cpustate);
        if (modrm >= 0xc0) {
@@ -5064,7 +5064,7 @@ static void SSEOP(packssdw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6b
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pcmpgtb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 64
+static void __FASTCALL SSEOP(pcmpgtb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 64
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5084,7 +5084,7 @@ static void SSEOP(pcmpgtb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 64
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pcmpgtw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 65
+static void __FASTCALL SSEOP(pcmpgtw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 65
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5104,7 +5104,7 @@ static void SSEOP(pcmpgtw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 65
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pcmpgtd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 66
+static void __FASTCALL SSEOP(pcmpgtd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 66
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5124,7 +5124,7 @@ static void SSEOP(pcmpgtd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 66
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(packuswb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 67
+static void __FASTCALL SSEOP(packuswb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 67
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5151,7 +5151,7 @@ static void SSEOP(packuswb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 67
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(punpckhbw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 68
+static void __FASTCALL SSEOP(punpckhbw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 68
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5177,7 +5177,7 @@ static void SSEOP(punpckhbw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 68
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(punpckhwd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 69
+static void __FASTCALL SSEOP(punpckhwd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 69
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5203,7 +5203,7 @@ static void SSEOP(punpckhwd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 69
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(unpckhdq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6a
+static void __FASTCALL SSEOP(unpckhdq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6a
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5229,7 +5229,7 @@ static void SSEOP(unpckhdq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6a
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(punpckhqdq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6d
+static void __FASTCALL SSEOP(punpckhqdq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6d
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5251,7 +5251,7 @@ static void SSEOP(punpckhqdq_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 6
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pcmpeqb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 74
+static void __FASTCALL SSEOP(pcmpeqb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 74
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5271,7 +5271,7 @@ static void SSEOP(pcmpeqb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 74
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pcmpeqw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 75
+static void __FASTCALL SSEOP(pcmpeqw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 75
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5291,7 +5291,7 @@ static void SSEOP(pcmpeqw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 75
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pcmpeqd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 76
+static void __FASTCALL SSEOP(pcmpeqd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 76
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5311,7 +5311,7 @@ static void SSEOP(pcmpeqd_r128_rm128)(i386_state* cpustate) // Opcode 66 0f 76
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(paddq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d4
+static void __FASTCALL SSEOP(paddq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d4
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5331,7 +5331,7 @@ static void SSEOP(paddq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d4
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmullw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d5
+static void __FASTCALL SSEOP(pmullw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d5
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5352,7 +5352,7 @@ static void SSEOP(pmullw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(paddb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fc
+static void __FASTCALL SSEOP(paddb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fc
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5368,7 +5368,7 @@ static void SSEOP(paddb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fc
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(paddw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fd
+static void __FASTCALL SSEOP(paddw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fd
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5384,7 +5384,7 @@ static void SSEOP(paddw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fd
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(paddd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fe
+static void __FASTCALL SSEOP(paddd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fe
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5400,7 +5400,7 @@ static void SSEOP(paddd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fe
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubusb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d8
+static void __FASTCALL SSEOP(psubusb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d8
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5416,7 +5416,7 @@ static void SSEOP(psubusb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d8
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubusw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d9
+static void __FASTCALL SSEOP(psubusw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d9
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5432,7 +5432,7 @@ static void SSEOP(psubusw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d9
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pand_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f db
+static void __FASTCALL SSEOP(pand_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f db
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5448,7 +5448,7 @@ static void SSEOP(pand_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f db
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pandn_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f df
+static void __FASTCALL SSEOP(pandn_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f df
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5464,7 +5464,7 @@ static void SSEOP(pandn_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f df
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(paddusb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f dc
+static void __FASTCALL SSEOP(paddusb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f dc
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5480,7 +5480,7 @@ static void SSEOP(paddusb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f dc
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(paddusw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f dd
+static void __FASTCALL SSEOP(paddusw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f dd
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5496,7 +5496,7 @@ static void SSEOP(paddusw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f dd
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmaxub_r128_rm128)(i386_state* cpustate) // Opcode 66 0f de
+static void __FASTCALL SSEOP(pmaxub_r128_rm128)(i386_state* cpustate) // Opcode 66 0f de
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5512,7 +5512,7 @@ static void SSEOP(pmaxub_r128_rm128)(i386_state* cpustate) // Opcode 66 0f de
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmulhuw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e4
+static void __FASTCALL SSEOP(pmulhuw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e4
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5528,7 +5528,7 @@ static void SSEOP(pmulhuw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e4
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmulhw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e5
+static void __FASTCALL SSEOP(pmulhw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e5
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5544,7 +5544,7 @@ static void SSEOP(pmulhw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubsb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e8
+static void __FASTCALL SSEOP(psubsb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e8
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5560,7 +5560,7 @@ static void SSEOP(psubsb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e8
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubsw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e9
+static void __FASTCALL SSEOP(psubsw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e9
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5576,7 +5576,7 @@ static void SSEOP(psubsw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e9
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pminsw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f ea
+static void __FASTCALL SSEOP(pminsw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f ea
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5592,7 +5592,7 @@ static void SSEOP(pminsw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f ea
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmaxsw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f ee
+static void __FASTCALL SSEOP(pmaxsw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f ee
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5608,7 +5608,7 @@ static void SSEOP(pmaxsw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f ee
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(paddsb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ec
+static void __FASTCALL SSEOP(paddsb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ec
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5624,7 +5624,7 @@ static void SSEOP(paddsb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ec
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(paddsw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ed
+static void __FASTCALL SSEOP(paddsw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ed
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5640,7 +5640,7 @@ static void SSEOP(paddsw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ed
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(por_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f eb
+static void __FASTCALL SSEOP(por_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f eb
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5656,7 +5656,7 @@ static void SSEOP(por_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f eb
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pxor_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ef
+static void __FASTCALL SSEOP(pxor_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ef
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5672,7 +5672,7 @@ static void SSEOP(pxor_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f ef
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pmaddwd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f5
+static void __FASTCALL SSEOP(pmaddwd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f5
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5690,7 +5690,7 @@ static void SSEOP(pmaddwd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f8
+static void __FASTCALL SSEOP(psubb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f8
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5706,7 +5706,7 @@ static void SSEOP(psubb_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f8
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f9
+static void __FASTCALL SSEOP(psubw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f9
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5722,7 +5722,7 @@ static void SSEOP(psubw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f9
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psubd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fa
+static void __FASTCALL SSEOP(psubd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fa
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5738,7 +5738,7 @@ static void SSEOP(psubd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f fa
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psadbw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f f6
+static void __FASTCALL SSEOP(psadbw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f f6
 {
        INT32 temp;
        UINT8 modrm = FETCH(cpustate);
@@ -5767,7 +5767,7 @@ static void SSEOP(psadbw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f f6
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pavgb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f e0
+static void __FASTCALL SSEOP(pavgb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f e0
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5783,7 +5783,7 @@ static void SSEOP(pavgb_r128_rm128)(i386_state* cpustate) // Opcode 66 0f e0
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pavgw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f e3
+static void __FASTCALL SSEOP(pavgw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f e3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5799,7 +5799,7 @@ static void SSEOP(pavgw_r128_rm128)(i386_state* cpustate) // Opcode 66 0f e3
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psrlw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d1
+static void __FASTCALL SSEOP(psrlw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d1
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5817,7 +5817,7 @@ static void SSEOP(psrlw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d1
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psrld_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d2
+static void __FASTCALL SSEOP(psrld_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d2
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5839,7 +5839,7 @@ static void SSEOP(psrld_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psrlq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d3
+static void __FASTCALL SSEOP(psrlq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5857,7 +5857,7 @@ static void SSEOP(psrlq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d3
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psllw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f1
+static void __FASTCALL SSEOP(psllw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f1
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5875,7 +5875,7 @@ static void SSEOP(psllw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f1
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(pslld_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f2
+static void __FASTCALL SSEOP(pslld_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f2
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5897,7 +5897,7 @@ static void SSEOP(pslld_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psllq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f3
+static void __FASTCALL SSEOP(psllq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f3
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5915,7 +5915,7 @@ static void SSEOP(psllq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f f3
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psraw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e1
+static void __FASTCALL SSEOP(psraw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e1
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5933,7 +5933,7 @@ static void SSEOP(psraw_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e1
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(psrad_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e2
+static void __FASTCALL SSEOP(psrad_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e2
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5955,7 +5955,7 @@ static void SSEOP(psrad_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movntdq_m128_r128)(i386_state* cpustate)  // Opcode 66 0f e7
+static void __FASTCALL SSEOP(movntdq_m128_r128)(i386_state* cpustate)  // Opcode 66 0f e7
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5968,7 +5968,7 @@ static void SSEOP(movntdq_m128_r128)(i386_state* cpustate)  // Opcode 66 0f e7
        }
 }
 
-static void SSEOP(cvttpd2dq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e6
+static void __FASTCALL SSEOP(cvttpd2dq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e6
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5986,7 +5986,7 @@ static void SSEOP(cvttpd2dq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movq_r128m64_r128)(i386_state* cpustate)  // Opcode 66 0f d6
+static void __FASTCALL SSEOP(movq_r128m64_r128)(i386_state* cpustate)  // Opcode 66 0f d6
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -5999,7 +5999,7 @@ static void SSEOP(movq_r128m64_r128)(i386_state* cpustate)  // Opcode 66 0f d6
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(addsubpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d0
+static void __FASTCALL SSEOP(addsubpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d0
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6020,7 +6020,7 @@ static void SSEOP(addsubpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f d0
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(haddpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 7c
+static void __FASTCALL SSEOP(haddpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 7c
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6041,7 +6041,7 @@ static void SSEOP(haddpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 7c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(hsubpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 7d
+static void __FASTCALL SSEOP(hsubpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 7d
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6062,7 +6062,7 @@ static void SSEOP(hsubpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 7d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(sqrtpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 51
+static void __FASTCALL SSEOP(sqrtpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 51
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6083,7 +6083,7 @@ static void SSEOP(sqrtpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 51
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtpi2pd_r128_rm64)(i386_state* cpustate)  // Opcode 66 0f 2a
+static void __FASTCALL SSEOP(cvtpi2pd_r128_rm64)(i386_state* cpustate)  // Opcode 66 0f 2a
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6100,7 +6100,7 @@ static void SSEOP(cvtpi2pd_r128_rm64)(i386_state* cpustate)  // Opcode 66 0f 2a
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvttpd2pi_r64_rm128)(i386_state* cpustate)  // Opcode 66 0f 2c
+static void __FASTCALL SSEOP(cvttpd2pi_r64_rm128)(i386_state* cpustate)  // Opcode 66 0f 2c
 {
        UINT8 modrm = FETCH(cpustate);
        MMXPROLOG(cpustate);
@@ -6117,7 +6117,7 @@ static void SSEOP(cvttpd2pi_r64_rm128)(i386_state* cpustate)  // Opcode 66 0f 2c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtpd2pi_r64_rm128)(i386_state* cpustate)  // Opcode 66 0f 2d
+static void __FASTCALL SSEOP(cvtpd2pi_r64_rm128)(i386_state* cpustate)  // Opcode 66 0f 2d
 {
        UINT8 modrm = FETCH(cpustate);
        MMXPROLOG(cpustate);
@@ -6134,7 +6134,7 @@ static void SSEOP(cvtpd2pi_r64_rm128)(i386_state* cpustate)  // Opcode 66 0f 2d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtpd2ps_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5a
+static void __FASTCALL SSEOP(cvtpd2ps_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5a
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6152,7 +6152,7 @@ static void SSEOP(cvtpd2ps_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5a
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtps2dq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5b
+static void __FASTCALL SSEOP(cvtps2dq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5b
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6172,7 +6172,7 @@ static void SSEOP(cvtps2dq_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5b
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(addpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 58
+static void __FASTCALL SSEOP(addpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 58
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6188,7 +6188,7 @@ static void SSEOP(addpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 58
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(mulpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 59
+static void __FASTCALL SSEOP(mulpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 59
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6204,7 +6204,7 @@ static void SSEOP(mulpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 59
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(subpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5c
+static void __FASTCALL SSEOP(subpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5c
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6220,7 +6220,7 @@ static void SSEOP(subpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(minpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5d
+static void __FASTCALL SSEOP(minpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5d
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6236,7 +6236,7 @@ static void SSEOP(minpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(divpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5e
+static void __FASTCALL SSEOP(divpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5e
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6252,7 +6252,7 @@ static void SSEOP(divpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(maxpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5f
+static void __FASTCALL SSEOP(maxpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5f
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6268,7 +6268,7 @@ static void SSEOP(maxpd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 5f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movntpd_m128_r128)(i386_state* cpustate)  // Opcode 66 0f 2b
+static void __FASTCALL SSEOP(movntpd_m128_r128)(i386_state* cpustate)  // Opcode 66 0f 2b
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6282,7 +6282,7 @@ static void SSEOP(movntpd_m128_r128)(i386_state* cpustate)  // Opcode 66 0f 2b
        }
 }
 
-static void SSEOP(movapd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 28
+static void __FASTCALL SSEOP(movapd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 28
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6294,7 +6294,7 @@ static void SSEOP(movapd_r128_rm128)(i386_state* cpustate)  // Opcode 66 0f 28
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movapd_rm128_r128)(i386_state* cpustate)  // Opcode 66 0f 29
+static void __FASTCALL SSEOP(movapd_rm128_r128)(i386_state* cpustate)  // Opcode 66 0f 29
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6306,7 +6306,7 @@ static void SSEOP(movapd_rm128_r128)(i386_state* cpustate)  // Opcode 66 0f 29
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 10
+static void __FASTCALL SSEOP(movsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 10
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6319,7 +6319,7 @@ static void SSEOP(movsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 10
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movsd_r128m64_r128)(i386_state* cpustate) // Opcode f2 0f 11
+static void __FASTCALL SSEOP(movsd_r128m64_r128)(i386_state* cpustate) // Opcode f2 0f 11
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6331,7 +6331,7 @@ static void SSEOP(movsd_r128m64_r128)(i386_state* cpustate) // Opcode f2 0f 11
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movddup_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 12
+static void __FASTCALL SSEOP(movddup_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 12
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6345,7 +6345,7 @@ static void SSEOP(movddup_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 12
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtsi2sd_r128_rm32)(i386_state* cpustate) // Opcode f2 0f 2a
+static void __FASTCALL SSEOP(cvtsi2sd_r128_rm32)(i386_state* cpustate) // Opcode f2 0f 2a
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6357,7 +6357,7 @@ static void SSEOP(cvtsi2sd_r128_rm32)(i386_state* cpustate) // Opcode f2 0f 2a
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvttsd2si_r32_r128m64)(i386_state* cpustate) // Opcode f2 0f 2c
+static void __FASTCALL SSEOP(cvttsd2si_r32_r128m64)(i386_state* cpustate) // Opcode f2 0f 2c
 {
        INT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -6373,7 +6373,7 @@ static void SSEOP(cvttsd2si_r32_r128m64)(i386_state* cpustate) // Opcode f2 0f 2
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtsd2si_r32_r128m64)(i386_state* cpustate) // Opcode f2 0f 2d
+static void __FASTCALL SSEOP(cvtsd2si_r32_r128m64)(i386_state* cpustate) // Opcode f2 0f 2d
 {
        INT32 src;
        UINT8 modrm = FETCH(cpustate);
@@ -6389,7 +6389,7 @@ static void SSEOP(cvtsd2si_r32_r128m64)(i386_state* cpustate) // Opcode f2 0f 2d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(sqrtsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 51
+static void __FASTCALL SSEOP(sqrtsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 51
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6408,7 +6408,7 @@ static void SSEOP(sqrtsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 51
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(addsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 58
+static void __FASTCALL SSEOP(addsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 58
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6422,7 +6422,7 @@ static void SSEOP(addsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 58
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(mulsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 59
+static void __FASTCALL SSEOP(mulsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 59
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6436,7 +6436,7 @@ static void SSEOP(mulsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 59
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cvtsd2ss_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5a
+static void __FASTCALL SSEOP(cvtsd2ss_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5a
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6450,7 +6450,7 @@ static void SSEOP(cvtsd2ss_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(subsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5c
+static void __FASTCALL SSEOP(subsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5c
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6464,7 +6464,7 @@ static void SSEOP(subsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(minsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5d
+static void __FASTCALL SSEOP(minsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5d
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6478,7 +6478,7 @@ static void SSEOP(minsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(divsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5e
+static void __FASTCALL SSEOP(divsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5e
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6492,7 +6492,7 @@ static void SSEOP(divsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5e
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(maxsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5f
+static void __FASTCALL SSEOP(maxsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5f
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6506,7 +6506,7 @@ static void SSEOP(maxsd_r128_r128m64)(i386_state* cpustate) // Opcode f2 0f 5f
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(haddps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f 7c
+static void __FASTCALL SSEOP(haddps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f 7c
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6539,7 +6539,7 @@ static void SSEOP(haddps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f 7c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(hsubps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f 7d
+static void __FASTCALL SSEOP(hsubps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f 7d
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6572,7 +6572,7 @@ static void SSEOP(hsubps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f 7d
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(cmpsd_r128_r128m64_i8)(i386_state* cpustate) // Opcode f2 0f c2
+static void __FASTCALL SSEOP(cmpsd_r128_r128m64_i8)(i386_state* cpustate) // Opcode f2 0f c2
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6593,7 +6593,7 @@ static void SSEOP(cmpsd_r128_r128m64_i8)(i386_state* cpustate) // Opcode f2 0f c
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(addsubps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f d0
+static void __FASTCALL SSEOP(addsubps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f d0
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6613,7 +6613,7 @@ static void SSEOP(addsubps_r128_rm128)(i386_state* cpustate) // Opcode f2 0f d0
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(movdq2q_r64_r128)(i386_state* cpustate) // Opcode f2 0f d6
+static void __FASTCALL SSEOP(movdq2q_r64_r128)(i386_state* cpustate) // Opcode f2 0f d6
 {
        UINT8 modrm = FETCH(cpustate);
        MMXPROLOG(cpustate);
@@ -6626,7 +6626,7 @@ static void SSEOP(movdq2q_r64_r128)(i386_state* cpustate) // Opcode f2 0f d6
        }
 }
 
-static void SSEOP(cvtpd2dq_r128_rm128)(i386_state* cpustate) // Opcode f2 0f e6
+static void __FASTCALL SSEOP(cvtpd2dq_r128_rm128)(i386_state* cpustate) // Opcode f2 0f e6
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
@@ -6644,7 +6644,7 @@ static void SSEOP(cvtpd2dq_r128_rm128)(i386_state* cpustate) // Opcode f2 0f e6
        CYCLES(cpustate,1);     // TODO: correct cycle count
 }
 
-static void SSEOP(lddqu_r128_m128)(i386_state* cpustate) // Opcode f2 0f f0
+static void __FASTCALL SSEOP(lddqu_r128_m128)(i386_state* cpustate) // Opcode f2 0f f0
 {
        UINT8 modrm = FETCH(cpustate);
        if( modrm >= 0xc0 ) {
index e6ffa0c..6234260 100644 (file)
@@ -191,13 +191,13 @@ INLINE void WRITE80(i386_state *cpustate, UINT32 ea, floatx80 t)
  *
  *************************************/
 
-INLINE void x87_set_stack_top(i386_state *cpustate, int top)
+INLINE void __FASTCALL x87_set_stack_top(i386_state *cpustate, int top)
 {
        cpustate->x87_sw &= ~(X87_SW_TOP_MASK << X87_SW_TOP_SHIFT);
        cpustate->x87_sw |= (top << X87_SW_TOP_SHIFT);
 }
 
-INLINE void x87_set_tag(i386_state *cpustate, int reg, int tag)
+INLINE void __FASTCALL x87_set_tag(i386_state *cpustate, int reg, int tag)
 {
        int shift = X87_TW_FIELD_SHIFT(reg);
 
@@ -205,7 +205,7 @@ INLINE void x87_set_tag(i386_state *cpustate, int reg, int tag)
        cpustate->x87_tw |= (tag << shift);
 }
 
-void x87_write_stack(i386_state *cpustate, int i, floatx80 value, int update_tag)
+void __FASTCALL x87_write_stack(i386_state *cpustate, int i, floatx80 value, int update_tag)
 {
        ST(i) = value;
 
@@ -230,13 +230,13 @@ void x87_write_stack(i386_state *cpustate, int i, floatx80 value, int update_tag
        }
 }
 
-INLINE void x87_set_stack_underflow(i386_state *cpustate)
+INLINE void __FASTCALL x87_set_stack_underflow(i386_state *cpustate)
 {
        cpustate->x87_sw &= ~X87_SW_C1;
        cpustate->x87_sw |= X87_SW_IE | X87_SW_SF;
 }
 
-INLINE void x87_set_stack_overflow(i386_state *cpustate)
+INLINE void __FASTCALL x87_set_stack_overflow(i386_state *cpustate)
 {
        cpustate->x87_sw |= X87_SW_C1 | X87_SW_IE | X87_SW_SF;
 }
@@ -329,7 +329,7 @@ int x87_check_exceptions(i386_state *cpustate)
        return 1;
 }
 
-INLINE void x87_write_cw(i386_state *cpustate, UINT16 cw)
+INLINE void __FASTCALL x87_write_cw(i386_state *cpustate, UINT16 cw)
 {
        cpustate->x87_cw = cw;
 
@@ -337,7 +337,7 @@ INLINE void x87_write_cw(i386_state *cpustate, UINT16 cw)
        float_rounding_mode = x87_to_sf_rc[(cpustate->x87_cw >> X87_CW_RC_SHIFT) & X87_CW_RC_MASK];
 }
 
-void x87_reset(i386_state *cpustate)
+void __FASTCALL x87_reset(i386_state *cpustate)
 {
        x87_write_cw(cpustate, 0x0037f);
 
@@ -492,7 +492,7 @@ static floatx80 x87_div(i386_state *cpustate, floatx80 a, floatx80 b)
  *
  *************************************/
 
-void x87_fadd_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fadd_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -527,7 +527,7 @@ void x87_fadd_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fadd_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fadd_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -562,7 +562,7 @@ void x87_fadd_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fadd_st_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fadd_st_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -595,7 +595,7 @@ void x87_fadd_st_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fadd_sti_st(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fadd_sti_st(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -628,7 +628,7 @@ void x87_fadd_sti_st(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_faddp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_faddp(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -664,7 +664,7 @@ void x87_faddp(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fiadd_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fiadd_m32int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -699,7 +699,7 @@ void x87_fiadd_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 19);
 }
 
-void x87_fiadd_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fiadd_m16int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -741,7 +741,7 @@ void x87_fiadd_m16int(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fsub_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsub_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -776,7 +776,7 @@ void x87_fsub_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fsub_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsub_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -811,7 +811,7 @@ void x87_fsub_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fsub_st_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsub_st_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -844,7 +844,7 @@ void x87_fsub_st_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fsub_sti_st(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsub_sti_st(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -877,7 +877,7 @@ void x87_fsub_sti_st(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fsubp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsubp(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -913,7 +913,7 @@ void x87_fsubp(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fisub_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fisub_m32int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -948,7 +948,7 @@ void x87_fisub_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 19);
 }
 
-void x87_fisub_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fisub_m16int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -990,7 +990,7 @@ void x87_fisub_m16int(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fsubr_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsubr_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1025,7 +1025,7 @@ void x87_fsubr_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fsubr_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsubr_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1060,7 +1060,7 @@ void x87_fsubr_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fsubr_st_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsubr_st_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -1093,7 +1093,7 @@ void x87_fsubr_st_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fsubr_sti_st(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsubr_sti_st(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -1126,7 +1126,7 @@ void x87_fsubr_sti_st(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fsubrp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsubrp(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -1162,7 +1162,7 @@ void x87_fsubrp(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fisubr_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fisubr_m32int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1197,7 +1197,7 @@ void x87_fisubr_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 19);
 }
 
-void x87_fisubr_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fisubr_m16int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1239,7 +1239,7 @@ void x87_fisubr_m16int(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fdiv_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdiv_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1274,7 +1274,7 @@ void x87_fdiv_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fdiv_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdiv_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1309,7 +1309,7 @@ void x87_fdiv_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fdiv_st_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdiv_st_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
        floatx80 result;
@@ -1344,7 +1344,7 @@ void x87_fdiv_st_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fdiv_sti_st(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdiv_sti_st(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
        floatx80 result;
@@ -1379,7 +1379,7 @@ void x87_fdiv_sti_st(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fdivp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdivp(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
        floatx80 result;
@@ -1415,7 +1415,7 @@ void x87_fdivp(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fidiv_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fidiv_m32int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1450,7 +1450,7 @@ void x87_fidiv_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fidiv_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fidiv_m16int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1492,7 +1492,7 @@ void x87_fidiv_m16int(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fdivr_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdivr_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1527,7 +1527,7 @@ void x87_fdivr_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fdivr_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdivr_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1562,7 +1562,7 @@ void x87_fdivr_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fdivr_st_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdivr_st_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
        floatx80 result;
@@ -1597,7 +1597,7 @@ void x87_fdivr_st_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fdivr_sti_st(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdivr_sti_st(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
        floatx80 result;
@@ -1632,7 +1632,7 @@ void x87_fdivr_sti_st(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fdivrp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdivrp(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
        floatx80 result;
@@ -1669,7 +1669,7 @@ void x87_fdivrp(i386_state *cpustate, UINT8 modrm)
 }
 
 
-void x87_fidivr_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fidivr_m32int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1704,7 +1704,7 @@ void x87_fidivr_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 73);
 }
 
-void x87_fidivr_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fidivr_m16int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1746,7 +1746,7 @@ void x87_fidivr_m16int(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fmul_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fmul_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1780,7 +1780,7 @@ void x87_fmul_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 11);
 }
 
-void x87_fmul_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fmul_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1814,7 +1814,7 @@ void x87_fmul_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 14);
 }
 
-void x87_fmul_st_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fmul_st_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -1846,7 +1846,7 @@ void x87_fmul_st_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 16);
 }
 
-void x87_fmul_sti_st(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fmul_sti_st(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -1878,7 +1878,7 @@ void x87_fmul_sti_st(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 16);
 }
 
-void x87_fmulp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fmulp(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -1913,7 +1913,7 @@ void x87_fmulp(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 16);
 }
 
-void x87_fimul_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fimul_m32int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1947,7 +1947,7 @@ void x87_fimul_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 22);
 }
 
-void x87_fimul_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fimul_m16int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -1988,7 +1988,7 @@ void x87_fimul_m16int(i386_state *cpustate, UINT8 modrm)
 *
 *************************************/
 
-void x87_fcmovb_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcmovb_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -2012,7 +2012,7 @@ void x87_fcmovb_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcmove_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcmove_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -2036,7 +2036,7 @@ void x87_fcmove_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcmovbe_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcmovbe_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -2060,7 +2060,7 @@ void x87_fcmovbe_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcmovu_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcmovu_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -2084,7 +2084,7 @@ void x87_fcmovu_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcmovnb_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcmovnb_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -2108,7 +2108,7 @@ void x87_fcmovnb_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcmovne_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcmovne_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -2132,7 +2132,7 @@ void x87_fcmovne_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcmovnbe_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcmovnbe_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -2156,7 +2156,7 @@ void x87_fcmovnbe_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcmovnu_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcmovnu_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
        int i = modrm & 7;
@@ -2186,7 +2186,7 @@ void x87_fcmovnu_sti(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 /* D9 F8 */
-void x87_fprem(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fprem(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2243,7 +2243,7 @@ void x87_fprem(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 84);
 }
 
-void x87_fprem1(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fprem1(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2269,7 +2269,7 @@ void x87_fprem1(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 94);
 }
 
-void x87_fsqrt(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsqrt(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2306,7 +2306,7 @@ void x87_fsqrt(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_f2xm1(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_f2xm1(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2331,7 +2331,7 @@ void x87_f2xm1(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 242);
 }
 
-void x87_fyl2x(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fyl2x(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2368,7 +2368,7 @@ void x87_fyl2x(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 250);
 }
 
-void x87_fyl2xp1(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fyl2xp1(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2397,7 +2397,7 @@ void x87_fyl2xp1(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 313);
 }
 /* D9 F2 if 8087   0 < angle < pi/4 */
-void x87_fptan(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fptan(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result1, result2;
 
@@ -2442,7 +2442,7 @@ void x87_fptan(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 244);
 }
 /* D9 F3 */
-void x87_fpatan(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fpatan(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2467,7 +2467,7 @@ void x87_fpatan(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 289);
 }
 /* D9 FE  387 only */
-void x87_fsin(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsin(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2500,7 +2500,7 @@ void x87_fsin(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 241);
 }
 /* D9 FF 387 only */
-void x87_fcos(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcos(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -2533,7 +2533,7 @@ void x87_fcos(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 241);
 }
 /* D9 FB  387 only */
-void x87_fsincos(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsincos(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 s_result, c_result;
 
@@ -2588,7 +2588,7 @@ void x87_fsincos(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fld_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fld_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2618,7 +2618,7 @@ void x87_fld_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 3);
 }
 
-void x87_fld_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fld_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2648,7 +2648,7 @@ void x87_fld_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 3);
 }
 
-void x87_fld_m80real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fld_m80real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2669,7 +2669,7 @@ void x87_fld_m80real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 6);
 }
 
-void x87_fld_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fld_sti(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2689,7 +2689,7 @@ void x87_fld_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fild_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fild_m16int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2712,7 +2712,7 @@ void x87_fild_m16int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 13);
 }
 
-void x87_fild_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fild_m32int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2735,7 +2735,7 @@ void x87_fild_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 9);
 }
 
-void x87_fild_m64int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fild_m64int(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2758,7 +2758,7 @@ void x87_fild_m64int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 10);
 }
 
-void x87_fbld(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fbld(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2803,7 +2803,7 @@ void x87_fbld(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fst_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fst_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2828,7 +2828,7 @@ void x87_fst_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 7);
 }
 
-void x87_fst_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fst_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2853,7 +2853,7 @@ void x87_fst_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fst_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fst_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
        floatx80 value;
@@ -2875,7 +2875,7 @@ void x87_fst_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 3);
 }
 
-void x87_fstp_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fstp_m32real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2901,7 +2901,7 @@ void x87_fstp_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 7);
 }
 
-void x87_fstp_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fstp_m64real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2928,7 +2928,7 @@ void x87_fstp_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fstp_m80real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fstp_m80real(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -2953,7 +2953,7 @@ void x87_fstp_m80real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 6);
 }
 
-void x87_fstp_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fstp_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
        floatx80 value;
@@ -2978,7 +2978,7 @@ void x87_fstp_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 3);
 }
 
-void x87_fist_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fist_m16int(i386_state *cpustate, UINT8 modrm)
 {
        INT16 m16int;
 
@@ -3011,7 +3011,7 @@ void x87_fist_m16int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 29);
 }
 
-void x87_fist_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fist_m32int(i386_state *cpustate, UINT8 modrm)
 {
        INT32 m32int;
 
@@ -3044,7 +3044,7 @@ void x87_fist_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 28);
 }
 
-void x87_fistp_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fistp_m16int(i386_state *cpustate, UINT8 modrm)
 {
        INT16 m16int;
 
@@ -3078,7 +3078,7 @@ void x87_fistp_m16int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 29);
 }
 
-void x87_fistp_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fistp_m32int(i386_state *cpustate, UINT8 modrm)
 {
        INT32 m32int;
 
@@ -3112,7 +3112,7 @@ void x87_fistp_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 29);
 }
 
-void x87_fistp_m64int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fistp_m64int(i386_state *cpustate, UINT8 modrm)
 {
        INT64 m64int;
 
@@ -3146,7 +3146,7 @@ void x87_fistp_m64int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 29);
 }
 
-void x87_fbstp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fbstp(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 result;
 
@@ -3188,7 +3188,7 @@ void x87_fbstp(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fld1(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fld1(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
        int tag;
@@ -3214,7 +3214,7 @@ void x87_fld1(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fldl2t(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fldl2t(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
        int tag;
@@ -3246,7 +3246,7 @@ void x87_fldl2t(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fldl2e(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fldl2e(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
        int tag;
@@ -3279,7 +3279,7 @@ void x87_fldl2e(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fldpi(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fldpi(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
        int tag;
@@ -3312,7 +3312,7 @@ void x87_fldpi(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fldlg2(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fldlg2(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
        int tag;
@@ -3345,7 +3345,7 @@ void x87_fldlg2(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fldln2(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fldln2(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
        int tag;
@@ -3378,7 +3378,7 @@ void x87_fldln2(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_fldz(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fldz(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
        int tag;
@@ -3411,12 +3411,12 @@ void x87_fldz(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fnop(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fnop(i386_state *cpustate, UINT8 modrm)
 {
        CYCLES(cpustate, 3);
 }
 
-void x87_fchs(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fchs(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -3439,7 +3439,7 @@ void x87_fchs(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 6);
 }
 
-void x87_fabs(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fabs(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -3462,7 +3462,7 @@ void x87_fabs(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 6);
 }
 
-void x87_fscale(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fscale(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -3503,7 +3503,7 @@ void x87_fscale(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 31);
 }
 
-void x87_frndint(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_frndint(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value;
 
@@ -3525,7 +3525,7 @@ void x87_frndint(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 21);
 }
 
-void x87_fxtract(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fxtract(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 sig80, exp80;
 
@@ -3578,7 +3578,7 @@ void x87_fxtract(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_ftst(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_ftst(i386_state *cpustate, UINT8 modrm)
 {
        if (X87_IS_ST_EMPTY(0))
        {
@@ -3609,7 +3609,7 @@ void x87_ftst(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fxam(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fxam(i386_state *cpustate, UINT8 modrm)
 {
        floatx80 value = ST(0);
 
@@ -3643,7 +3643,7 @@ void x87_fxam(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 8);
 }
 
-void x87_ficom_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_ficom_m16int(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 2);
        if (X87_IS_ST_EMPTY(0))
@@ -3680,7 +3680,7 @@ void x87_ficom_m16int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 16);
 }
 
-void x87_ficom_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_ficom_m32int(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 4);
        if (X87_IS_ST_EMPTY(0))
@@ -3717,7 +3717,7 @@ void x87_ficom_m32int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 15);
 }
 
-void x87_ficomp_m16int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_ficomp_m16int(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 2);
        if (X87_IS_ST_EMPTY(0))
@@ -3755,7 +3755,7 @@ void x87_ficomp_m16int(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 16);
 }
 
-void x87_ficomp_m32int(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_ficomp_m32int(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 4);
        if (X87_IS_ST_EMPTY(0))
@@ -3794,7 +3794,7 @@ void x87_ficomp_m32int(i386_state *cpustate, UINT8 modrm)
 }
 
 
-void x87_fcom_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcom_m32real(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 4);
        if (X87_IS_ST_EMPTY(0))
@@ -3831,7 +3831,7 @@ void x87_fcom_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcom_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcom_m64real(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 8);
        if (X87_IS_ST_EMPTY(0))
@@ -3868,7 +3868,7 @@ void x87_fcom_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcom_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcom_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -3904,7 +3904,7 @@ void x87_fcom_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcomp_m32real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcomp_m32real(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 4);
        if (X87_IS_ST_EMPTY(0))
@@ -3942,7 +3942,7 @@ void x87_fcomp_m32real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcomp_m64real(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcomp_m64real(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 8);
        if (X87_IS_ST_EMPTY(0))
@@ -3980,7 +3980,7 @@ void x87_fcomp_m64real(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcomp_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcomp_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -4017,7 +4017,7 @@ void x87_fcomp_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fcomi_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcomi_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -4061,7 +4061,7 @@ void x87_fcomi_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4); // TODO: correct cycle count
 }
 
-void x87_fcomip_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcomip_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -4106,7 +4106,7 @@ void x87_fcomip_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4); // TODO: correct cycle count
 }
 
-void x87_fucomi_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fucomi_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -4156,7 +4156,7 @@ void x87_fucomi_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4); // TODO: correct cycle count
 }
 
-void x87_fucomip_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fucomip_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -4207,7 +4207,7 @@ void x87_fucomip_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4); // TODO: correct cycle count
 }
 
-void x87_fcompp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fcompp(i386_state *cpustate, UINT8 modrm)
 {
        if (X87_IS_ST_EMPTY(0) || X87_IS_ST_EMPTY(1))
        {
@@ -4252,7 +4252,7 @@ void x87_fcompp(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fucom_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fucom_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -4290,7 +4290,7 @@ void x87_fucom_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fucomp_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fucomp_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -4329,7 +4329,7 @@ void x87_fucomp_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fucompp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fucompp(i386_state *cpustate, UINT8 modrm)
 {
        if (X87_IS_ST_EMPTY(0) || X87_IS_ST_EMPTY(1))
        {
@@ -4376,7 +4376,7 @@ void x87_fucompp(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-void x87_fdecstp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdecstp(i386_state *cpustate, UINT8 modrm)
 {
        cpustate->x87_sw &= ~X87_SW_C1;
 
@@ -4385,7 +4385,7 @@ void x87_fdecstp(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 3);
 }
 
-void x87_fincstp(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fincstp(i386_state *cpustate, UINT8 modrm)
 {
        cpustate->x87_sw &= ~X87_SW_C1;
 
@@ -4394,14 +4394,14 @@ void x87_fincstp(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 3);
 }
 
-void x87_fclex(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fclex(i386_state *cpustate, UINT8 modrm)
 {
        cpustate->x87_sw &= ~0x80ff;
 //     ferr_handler(cpustate, 0);
        CYCLES(cpustate, 7);
 }
 
-void x87_feni(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_feni(i386_state *cpustate, UINT8 modrm)
 {
        cpustate->x87_cw &= ~X87_CW_IEM;
        x87_check_exceptions(cpustate);
@@ -4409,28 +4409,28 @@ void x87_feni(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 5);
 }
 
-void x87_fdisi(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fdisi(i386_state *cpustate, UINT8 modrm)
 {
        cpustate->x87_cw |= X87_CW_IEM;
 
        CYCLES(cpustate, 5);
 }
 
-void x87_ffree(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_ffree(i386_state *cpustate, UINT8 modrm)
 {
        x87_set_tag(cpustate, ST_TO_PHYS(modrm & 7), X87_TW_EMPTY);
 
        CYCLES(cpustate, 3);
 }
 
-void x87_finit(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_finit(i386_state *cpustate, UINT8 modrm)
 {
        x87_reset(cpustate);
 
        CYCLES(cpustate, 17);
 }
 
-void x87_fldcw(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fldcw(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 2);
        UINT16 cw = READ16(cpustate, ea);
@@ -4442,7 +4442,7 @@ void x87_fldcw(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fstcw(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fstcw(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 1, 2);
        WRITE16(cpustate, ea, cpustate->x87_cw);
@@ -4450,7 +4450,7 @@ void x87_fstcw(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 3);
 }
 
-void x87_fldenv(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fldenv(i386_state *cpustate, UINT8 modrm)
 {
        // TODO: Pointers and selectors
        if (cpustate->operand_size)
@@ -4475,7 +4475,7 @@ void x87_fldenv(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate,(cpustate->cr[0] & 1) ? 34 : 44);
 }
 
-void x87_fstenv(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fstenv(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea;
 
@@ -4529,7 +4529,7 @@ void x87_fstenv(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate,(cpustate->cr[0] & 1) ? 56 : 67);
 }
 
-void x87_fsave(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fsave(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 1, 80);
 
@@ -4585,7 +4585,7 @@ void x87_fsave(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate,(cpustate->cr[0] & 1) ? 56 : 67);
 }
 
-void x87_frstor(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_frstor(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 0, 80);
 
@@ -4641,7 +4641,7 @@ void x87_frstor(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate,(cpustate->cr[0] & 1) ? 34 : 44);
 }
 
-void x87_fxch(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fxch(i386_state *cpustate, UINT8 modrm)
 {
        if (X87_IS_ST_EMPTY(0) || X87_IS_ST_EMPTY(1))
                x87_set_stack_underflow(cpustate);
@@ -4661,7 +4661,7 @@ void x87_fxch(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fxch_sti(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fxch_sti(i386_state *cpustate, UINT8 modrm)
 {
        int i = modrm & 7;
 
@@ -4693,14 +4693,14 @@ void x87_fxch_sti(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 4);
 }
 
-void x87_fstsw_ax(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fstsw_ax(i386_state *cpustate, UINT8 modrm)
 {
        REG16(AX) = cpustate->x87_sw;
 
        CYCLES(cpustate, 3);
 }
 
-void x87_fstsw_m2byte(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_fstsw_m2byte(i386_state *cpustate, UINT8 modrm)
 {
        UINT32 ea = GetEA(cpustate, modrm, 1, 2);
 
@@ -4709,7 +4709,7 @@ void x87_fstsw_m2byte(i386_state *cpustate, UINT8 modrm)
        CYCLES(cpustate, 3);
 }
 
-void x87_invalid(i386_state *cpustate, UINT8 modrm)
+void __FASTCALL x87_invalid(i386_state *cpustate, UINT8 modrm)
 {
        // TODO
        report_invalid_opcode(cpustate);
@@ -4724,49 +4724,49 @@ void x87_invalid(i386_state *cpustate, UINT8 modrm)
  *
  *************************************/
 
-static void I386OP(x87_group_d8)(i386_state *cpustate)
+static void __FASTCALL I386OP(x87_group_d8)(i386_state *cpustate)
 {
        UINT8 modrm = FETCH(cpustate);
        cpustate->opcode_table_x87_d8[modrm](cpustate, modrm);
 }
 
-static void I386OP(x87_group_d9)(i386_state *cpustate)
+static void __FASTCALL I386OP(x87_group_d9)(i386_state *cpustate)
 {
        UINT8 modrm = FETCH(cpustate);
        cpustate->opcode_table_x87_d9[modrm](cpustate, modrm);
 }
 
-static void I386OP(x87_group_da)(i386_state *cpustate)
+static void __FASTCALL I386OP(x87_group_da)(i386_state *cpustate)
 {
        UINT8 modrm = FETCH(cpustate);
        cpustate->opcode_table_x87_da[modrm](cpustate, modrm);
 }
 
-static void I386OP(x87_group_db)(i386_state *cpustate)
+static void __FASTCALL I386OP(x87_group_db)(i386_state *cpustate)
 {
        UINT8 modrm = FETCH(cpustate);
        cpustate->opcode_table_x87_db[modrm](cpustate, modrm);
 }
 
-static void I386OP(x87_group_dc)(i386_state *cpustate)
+static void __FASTCALL I386OP(x87_group_dc)(i386_state *cpustate)
 {
        UINT8 modrm = FETCH(cpustate);
        cpustate->opcode_table_x87_dc[modrm](cpustate, modrm);
 }
 
-static void I386OP(x87_group_dd)(i386_state *cpustate)
+static void __FASTCALL I386OP(x87_group_dd)(i386_state *cpustate)
 {
        UINT8 modrm = FETCH(cpustate);
        cpustate->opcode_table_x87_dd[modrm](cpustate, modrm);
 }
 
-static void I386OP(x87_group_de)(i386_state *cpustate)
+static void __FASTCALL I386OP(x87_group_de)(i386_state *cpustate)
 {
        UINT8 modrm = FETCH(cpustate);
        cpustate->opcode_table_x87_de[modrm](cpustate, modrm);
 }
 
-static void I386OP(x87_group_df)(i386_state *cpustate)
+static void __FASTCALL I386OP(x87_group_df)(i386_state *cpustate)
 {
        UINT8 modrm = FETCH(cpustate);
        cpustate->opcode_table_x87_df[modrm](cpustate, modrm);
@@ -4785,7 +4785,7 @@ void build_x87_opcode_table_d8(i386_state *cpustate)
 
        for (modrm = 0; modrm < 0x100; ++modrm)
        {
-               void (*ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
+               void (__FASTCALL *ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
 
                if (modrm < 0xc0)
                {
@@ -4827,7 +4827,7 @@ void build_x87_opcode_table_d9(i386_state *cpustate)
 
        for (modrm = 0; modrm < 0x100; ++modrm)
        {
-               void (*ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
+               void (__FASTCALL *ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
 
                if (modrm < 0xc0)
                {
@@ -4905,7 +4905,7 @@ void build_x87_opcode_table_da(i386_state *cpustate)
 
        for (modrm = 0; modrm < 0x100; ++modrm)
        {
-               void (*ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
+               void (__FASTCALL *ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
 
                if (modrm < 0xc0)
                {
@@ -4944,7 +4944,7 @@ void build_x87_opcode_table_db(i386_state *cpustate)
 
        for (modrm = 0; modrm < 0x100; ++modrm)
        {
-               void (*ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
+               void (__FASTCALL *ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
 
                if (modrm < 0xc0)
                {
@@ -4986,7 +4986,7 @@ void build_x87_opcode_table_dc(i386_state *cpustate)
 
        for (modrm = 0; modrm < 0x100; ++modrm)
        {
-               void (*ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
+               void (__FASTCALL *ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
 
                if (modrm < 0xc0)
                {
@@ -5026,7 +5026,7 @@ void build_x87_opcode_table_dd(i386_state *cpustate)
 
        for (modrm = 0; modrm < 0x100; ++modrm)
        {
-               void (*ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
+               void (__FASTCALL *ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
 
                if (modrm < 0xc0)
                {
@@ -5064,7 +5064,7 @@ void build_x87_opcode_table_de(i386_state *cpustate)
 
        for (modrm = 0; modrm < 0x100; ++modrm)
        {
-               void (*ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
+               void (__FASTCALL *ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
 
                if (modrm < 0xc0)
                {
@@ -5105,7 +5105,7 @@ void build_x87_opcode_table_df(i386_state *cpustate)
 
        for (modrm = 0; modrm < 0x100; ++modrm)
        {
-               void (*ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
+               void (__FASTCALL *ptr)(i386_state *cpustate, UINT8 modrm) = x87_invalid;
 
                if (modrm < 0xc0)
                {