OSDN Git Service

[VM][FMTOWNS][VRAM] More faster VRAM access for packed pixel mode.
authorK.Ohta <whatisthis.sowhat@gmail.com>
Mon, 19 Jul 2021 17:59:02 +0000 (02:59 +0900)
committerK.Ohta <whatisthis.sowhat@gmail.com>
Mon, 19 Jul 2021 17:59:02 +0000 (02:59 +0900)
source/src/vm/fmtowns/vram.cpp
source/src/vm/fmtowns/vram.h

index bd76fff..7d71f95 100644 (file)
@@ -30,7 +30,11 @@ void TOWNS_VRAM::reset()
        for(int i = 0; i < (sizeof(dirty_flag) / sizeof(bool)); i++) {
                dirty_flag[i] = true;
        }
-       packed_pixel_mask_reg.d = 0xffffffff;
+       
+__DECL_VECTORIZED_LOOP                                         
+       for(int i = 0; i < 8; i++) {
+               packed_pixel_mask_reg[i] = 0xff;
+       }
        vram_access_reg_addr = 0;
        sprite_busy = false;
        sprite_disp_page = false;
@@ -60,230 +64,72 @@ void TOWNS_VRAM::make_dirty_vram(uint32_t addr, int bytes)
        
 void TOWNS_VRAM::write_memory_mapped_io8(uint32_t addr, uint32_t data)
 {
-       __DECL_ALIGNED(8) uint8_t mask[4];
-       packed_pixel_mask_reg.write_4bytes_le_to(mask);
-       uint8_t rmask = mask[addr & 3];
+       
        uint32_t naddr = addr & 0x7ffff;
+       uint8_t mask = packed_pixel_mask_reg[naddr & 3];
        uint8_t nd = vram[naddr];
-       data = data & rmask;
-       nd = nd & ~rmask;
-       vram[naddr] = nd | data;
+       uint8_t dd = data;
+       dd = dd & mask;
+       nd = nd & ~mask;
+       vram[naddr] = nd | dd;
 }
 
 void TOWNS_VRAM::write_memory_mapped_io16(uint32_t addr, uint32_t data)
 {
-       uint32_t naddr1 = addr & 0x7ffff;
-       uint32_t naddr2 = (addr + 1) & 0x7ffff;;
-       pair16_t nd, md;
-       pair16_t xmask;
-       switch(naddr1 & 3) {
-       case 0:
-               xmask.w =  packed_pixel_mask_reg.w.l;
-               break;
-       case 1:
-               xmask.b.l =  packed_pixel_mask_reg.b.h;
-               xmask.b.h =  packed_pixel_mask_reg.b.h2;
-               break;
-       case 2:
-               xmask.w =  packed_pixel_mask_reg.w.h;
-               break;
-       case 3:
-               xmask.b.l =  packed_pixel_mask_reg.b.h3;
-               xmask.b.h =  packed_pixel_mask_reg.b.l;
-//             if((addr == 0x8003ffff) || (addr == 0x8007ffff)) { // Wrap1
-//                     naddr2 = (addr == 0x8003ffff) ? 0x00000 : 0x40000;
-//             } else if(addr == 0x8017ffff) { // wrap 2
-//                     naddr2 = 0x00000;
-//             }
-               break;
-       }
+       uint32_t naddr = addr & 0x7ffff;
+       uint32_t maddr = addr & 3;
+       pair16_t dmask;
+       pair16_t xdata;
+       pair16_t ydata;
 
-       if(naddr1 == 0x7ffff) { // Wrap
-               nd.w = (uint16_t)data;
-               md.b.l = vram[naddr1];
-               nd.w = nd.w & xmask.w;
-               md.w = md.w & ~(xmask.w);
-               md.w = md.w | nd.w;
-               vram[naddr1] = md.b.l;
-       } else {
-               md.read_2bytes_le_from(&(vram[naddr1]));
-               
-               nd.w = (uint16_t)data;
-               
-               nd.w = nd.w & xmask.w;
-               md.w = md.w & ~(xmask.w);
-               md.w = md.w | nd.w;
-               
-               md.write_2bytes_le_to(&(vram[naddr1]));
-       }
+       dmask.b.l = packed_pixel_mask_reg[maddr + 0];
+       dmask.b.h = packed_pixel_mask_reg[maddr + 1];
+       
+       xdata.b.l = vram[naddr + 0];
+       xdata.b.h = vram[naddr + 1];
+
+       ydata.w = data;
 
+       xdata.w = xdata.w & ~(dmask.w);
+       ydata.w = ydata.w & dmask.w;
+
+       xdata.w = xdata.w | ydata.w;
+
+       vram[naddr + 0] = xdata.b.l;
+       vram[naddr + 1] = xdata.b.h;
+       
        return;
 }
        
 void TOWNS_VRAM::write_memory_mapped_io32(uint32_t addr, uint32_t data)
 {
 
-       __DECL_ALIGNED(8) uint8_t mask[4];
+       __DECL_ALIGNED(8) uint8_t mask[8];
        uint32_t naddr = addr & 0x7ffff;
-       pair32_t nd, md;
-       pair32_t xmask;
-       
-       nd.d = data;
-       switch(naddr & 3) {
-       case 0:
-               xmask.d =  packed_pixel_mask_reg.d;
-               break;
-       case 1:
-               xmask.b.l  =  packed_pixel_mask_reg.b.h;
-               xmask.b.h  =  packed_pixel_mask_reg.b.h2;
-               xmask.b.h2 =  packed_pixel_mask_reg.b.h3;
-               xmask.b.h3 =  packed_pixel_mask_reg.b.l;
-               break;
-       case 2:
-               xmask.b.l  =  packed_pixel_mask_reg.b.h2;
-               xmask.b.h  =  packed_pixel_mask_reg.b.h3;
-               xmask.b.h2 =  packed_pixel_mask_reg.b.l;
-               xmask.b.h3 =  packed_pixel_mask_reg.b.h;
-               break;
-       case 3:
-               xmask.b.l  =  packed_pixel_mask_reg.b.h3;
-               xmask.b.h  =  packed_pixel_mask_reg.b.l;
-               xmask.b.h2 =  packed_pixel_mask_reg.b.h;
-               xmask.b.h3 =  packed_pixel_mask_reg.b.h2;
-               break;
-       }
+       uint32_t maddr = addr & 3;
+       pair32_t dmask;
+       pair32_t xdata;
+       pair32_t ydata;
+
+       dmask.b.l  = packed_pixel_mask_reg[maddr + 0];
+       dmask.b.h  = packed_pixel_mask_reg[maddr + 1];
+       dmask.b.h2 = packed_pixel_mask_reg[maddr + 2];
+       dmask.b.h3 = packed_pixel_mask_reg[maddr + 3];
+
+       ydata.d = data;
+       xdata.b.l = vram[naddr + 0];
+       xdata.b.h = vram[naddr + 1];
+       xdata.b.h2= vram[naddr + 2];
+       xdata.b.h3= vram[naddr + 3];
+
+       xdata.d = xdata.d & ~(dmask.d);
+       ydata.d = ydata.d & dmask.d;
+       xdata.d = xdata.d | ydata.d;
        
-       if((addr & 3) != 0) {
-               if(naddr <= 0x7fffc) {
-                       md.read_4bytes_le_from(&(vram[naddr]));
-                       nd.d = nd.d & xmask.d;
-                       md.d = md.d & ~(xmask.d);
-                       md.d = md.d | nd.d;
-                       md.write_4bytes_le_to(&(vram[naddr]));
-               } else {
-                       switch(naddr & 3) {
-                       case 1: // 7fffd
-                               md.b.l  = vram[naddr];
-                               md.b.h  = vram[naddr + 1];
-                               md.b.h2 = vram[naddr + 2];
-                               break;
-                       case 2: // 7fffe
-                               md.b.l  = vram[naddr];
-                               md.b.h  = vram[naddr + 1];
-                               break;
-                       case 3: // 7fffe
-                               md.b.l  = vram[naddr];
-                               break;
-                       }
-                       nd.d = nd.d & xmask.d;
-                       md.d = md.d & ~(xmask.d);
-                       md.d = md.d | nd.d;
-                       switch(naddr & 3) {
-                       case 1: // 7fffd
-                               vram[naddr] = md.b.l;
-                               vram[naddr + 1] = md.b.h;
-                               vram[naddr + 2] = md.b.h2;
-                               break;
-                       case 2: // 7fffe
-                               vram[naddr] = md.b.l;
-                               vram[naddr + 1] = md.b.h;
-                               break;
-                       case 3: // 7fffe
-                               vram[naddr] = md.b.l;
-                               break;
-                       }       
-               }
-//             uint32_t xaddr = addr & 0x0017ffff;
-//             uint32_t naddr2 = naddr + 1;
-//             uint32_t naddr3 = naddr + 2;
-//             uint32_t naddr4 = naddr + 3;
-//             uint32_t offmask;
-//             if((addr & 0x3ffff) < 0x3fffd) {
-//                     // Maybe not wrapped.
-//                     md.read_4bytes_le_from(&(vram[naddr]));
-//                     
-//                     nd.d = nd.d & xmask.d;
-//                     md.d = md.d & ~(xmask.d);
-//                     md.d = md.d | nd.d;
-//                     
-//                     md.write_4bytes_le_to(&(vram[naddr]));
-//             } else {
-                       // Maybe not wrapped.
-//                     switch(xaddr) {
-//                     case 0x17fffd:
-//                     case 0x17fffe:
-//                     case 0x17ffff:
-//                             naddr2 = naddr2 & 0x7ffff;
-//                             naddr3 = naddr3 & 0x7ffff;
-//                             naddr4 = naddr4 & 0x7ffff;
-///                            
-//                             md.b.l   = vram[naddr];
-//                             md.b.h   = vram[naddr2];
-//                             md.b.h2  = vram[naddr3];
-//                             md.b.h3  = vram[naddr4];
-                               
-//                             nd.d = nd.d & xmask.d;
-//                             md.d = md.d & ~(xmask.d);
-//                             md.d = md.d | nd.d;
-                       
-//                             vram[naddr] = md.b.l;
-//                             vram[naddr2] = md.b.h;
-//                             vram[naddr3] = md.b.h2;
-//                             vram[naddr4] = md.b.h3;
-//                             break;
-//                     case 0x03fffd:
-//                     case 0x03fffe:
-//                     case 0x03ffff:
-//                     case 0x07fffd:
-//                     case 0x07fffe:
-//                     case 0x07ffff:
-//                             offmask = ((addr & 0x00040000) != 0) ? 0x40000 : 0x00000;
-//                             naddr2 = (naddr2 & 0x3ffff) + offmask;
-//                             naddr3 = (naddr3 & 0x3ffff) + offmask;
-//                             naddr4 = (naddr4 & 0x3ffff) + offmask;
-//                     
-//                             md.b.l   = vram[naddr];
-//                             md.b.h   = vram[naddr2];
-//                             md.b.h2  = vram[naddr3];
-//                             md.b.h3  = vram[naddr4];
-
-//                             nd.d = nd.d & xmask.d;
-//                             md.d = md.d & ~(xmask.d);
-//                             md.d = md.d | nd.d;
-                       
-//                             vram[naddr]  = md.b.l;
-//                             vram[naddr2] = md.b.h;
-//                             vram[naddr3] = md.b.h2;
-//                             vram[naddr4] = md.b.h3;
-//                             break;
-//                     default:
-//                             md.read_4bytes_le_from(&(vram[naddr]));
-//                     
-//                             nd.d = nd.d & xmask.d;
-//                             md.d = md.d & ~(xmask.d);
-//                             md.d = md.d | nd.d;
-//                     
-//                             md.write_4bytes_le_to(&(vram[naddr]));
-//                             break;
-//                     }
-//             }
-       } else {
-               // Aligned
-#ifdef __LITTLE_ENDIAN__
-               md.d = *((uint32_t*)(&(vram[naddr])));
-#else                  
-               md.read_4bytes_le_from(&(vram[naddr]));
-#endif         
-               nd.d = nd.d & xmask.d;
-               md.d = md.d & (~(xmask.d));
-               md.d = md.d | nd.d;
-               
-#ifdef __LITTLE_ENDIAN__
-               *((uint32_t*)(&(vram[naddr]))) = md.d;
-#else
-               md.write_4bytes_le_to(&(vram[naddr]));
-#endif
-       }
+       vram[naddr + 0] = xdata.b.l;
+       vram[naddr + 1] = xdata.b.h;
+       vram[naddr + 2] = xdata.b.h2;
+       vram[naddr + 3] = xdata.b.h3;
        return;
 }
 
@@ -294,118 +140,22 @@ uint32_t TOWNS_VRAM::read_memory_mapped_io8(uint32_t addr)
 
 uint32_t TOWNS_VRAM::read_memory_mapped_io16(uint32_t addr)
 {
-       pair16_t a;
-
        uint32_t naddr = addr & 0x7ffff;
-       if((addr & 1) == 0) { // Aligned
-               a.read_2bytes_le_from(&(vram[naddr]));
-       } else {
-               /*
-               switch(addr) {
-               case 0x8003ffff:
-                       a.b.l = vram[0x3ffff];
-                       a.b.h = vram[0x00000];
-                       break;
-               case 0x8007ffff:
-                       a.b.l = vram[0x7ffff];
-                       a.b.h = vram[0x40000];
-                       break;
-               case 0x8017ffff:
-                       a.b.l = vram[0x7ffff];
-                       a.b.h = vram[0x00000];
-                       break;
-               default:
-                       a.read_2bytes_le_from(&(vram[naddr]));
-                       break;
-               }
-               */
-               if(naddr == 0x7ffff) {
-                       a.b.l = vram[naddr];
-                       a.b.h = 0xff;
-               } else {
-                       a.b.l = vram[naddr];
-                       a.b.h = vram[naddr + 1];
-               }
-       }
-       return (uint32_t)(a.w);
+       pair16_t data;
+       data.b.l = vram[naddr + 0];
+       data.b.h = vram[naddr + 1];
+       return (uint32_t)(data.w);
 }
 
 uint32_t TOWNS_VRAM::read_memory_mapped_io32(uint32_t addr)
 {
-       pair32_t a;
-
        uint32_t naddr = addr & 0x7ffff;
-       if((addr & 3) == 0) { // Aligned
-#ifdef __LITTLE_ENDIAN__
-               a.d = *((uint32_t*)(&(vram[naddr])));
-#else
-               a.read_4bytes_le_from(&(vram[naddr]));
-#endif
-       } else { // Unaligned
-               if((addr & 0x7ffff) < 0x7fffd) {
-                       // Maybe not wrapped.
-                       a.read_4bytes_le_from(&(vram[naddr]));
-               } else  {
-                       a.d = 0xffffffff;
-                       switch(naddr & 3) {
-                       case 1:
-                               a.b.l  = vram[naddr];
-                               a.b.h  = vram[naddr + 1];
-                               a.b.h2 = vram[naddr + 2];
-                               break;
-                       case 2:
-                               a.b.l  = vram[naddr];
-                               a.b.h  = vram[naddr + 1];
-                               break;
-                       case 3:
-                               a.b.l  = vram[naddr];
-                               break;
-                               
-                       }
-                       /*
-                       uint32_t xaddr = addr & 0x0017ffff;
-                       uint32_t naddr2 = naddr + 1;
-                       uint32_t naddr3 = naddr + 2;
-                       uint32_t naddr4 = naddr + 3;
-                       uint32_t offmask;
-                       switch(xaddr) {
-                       case 0x17fffd:
-                       case 0x17fffe:
-                       case 0x17ffff:
-                               naddr2 = naddr2 & 0x7ffff;
-                               naddr3 = naddr3 & 0x7ffff;
-                               naddr4 = naddr4 & 0x7ffff;
-                               
-                               a.b.l   = vram[naddr];
-                               a.b.h   = vram[naddr2];
-                               a.b.h2  = vram[naddr3];
-                               a.b.h3  = vram[naddr4];
-                               break;
-                       case 0x03fffd:
-                       case 0x03fffe:
-                       case 0x03ffff:
-                       case 0x07fffd:
-                       case 0x07fffe:
-                       case 0x07ffff:
-                               offmask = ((addr & 0x00040000) != 0) ? 0x40000 : 0x00000;
-                               naddr2 = (naddr2 & 0x3ffff) + offmask;
-                               naddr3 = (naddr3 & 0x3ffff) + offmask;
-                               naddr4 = (naddr4 & 0x3ffff) + offmask;
-                       
-                               a.b.l   = vram[naddr];
-                               a.b.h   = vram[naddr2];
-                               a.b.h2  = vram[naddr3];
-                               a.b.h3  = vram[naddr4];
-                               break;
-                       default:
-                       // Maybe not wrapped.
-                               a.read_4bytes_le_from(&(vram[naddr]));
-                               break;
-                       }
-                       */
-               }
-       }
-       return a.d;
+       pair32_t data;
+       data.b.l  = vram[naddr + 0];
+       data.b.h  = vram[naddr + 1];
+       data.b.h2 = vram[naddr + 2];
+       data.b.h3 = vram[naddr + 3];
+       return data.d;
 }
 
 void TOWNS_VRAM::write_signal(int id, uint32_t data, uint32_t mask)
@@ -422,25 +172,13 @@ void TOWNS_VRAM::write_io8(uint32_t address,  uint32_t data)
 //             out_debug_log(_T("VRAM ACCESS(0458h)=%02X"), data);
                break;
        case 0x045a:
-               switch(vram_access_reg_addr) {
-               case 0:
-                       packed_pixel_mask_reg.b.l = data;
-                       break;
-               case 1:
-                       packed_pixel_mask_reg.b.h2 = data;
-                       break;
-               }                       
+               packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 0] = data;
+               packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 4] = data;
 //             out_debug_log(_T("VRAM MASK(045Ah)=%08X"), packed_pixel_mask_reg.d);
                break;
        case 0x045b:
-               switch(vram_access_reg_addr) {
-               case 0:
-                       packed_pixel_mask_reg.b.h = data;
-                       break;
-               case 1:
-                       packed_pixel_mask_reg.b.h3 = data;
-                       break;
-               }                       
+               packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 1] = data;
+               packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 5] = data;
 //             out_debug_log(_T("VRAM MASK(045Bh)=%08X"), packed_pixel_mask_reg.d);
                break;
        case 0x05ee:
@@ -457,18 +195,16 @@ void TOWNS_VRAM::write_io16(uint32_t address,  uint32_t data)
        switch(address & 0xffff) {
        case 0x0458:
                vram_access_reg_addr = data & 3;
-//             out_debug_log(_T("VRAM ACCESS(0458h)=%02X"), data);
                break;
        case 0x045a:
-               switch(vram_access_reg_addr) {
-               case 0:
-                       packed_pixel_mask_reg.w.l = d.w.l;
-                       break;
-               case 1:
-                       packed_pixel_mask_reg.w.h = d.w.l;
-                       break;
-               }                       
-//             out_debug_log(_T("VRAM MASK(045Ah)=%08X"), packed_pixel_mask_reg.d);
+               {
+                       pair16_t w;
+                       w.w = data;
+                       packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 0] = w.b.l;
+                       packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 1] = w.b.h;
+                       packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 4] = w.b.l;
+                       packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 5] = w.b.h;
+               }
                break;
        case 0x5ee:
                {
@@ -487,24 +223,16 @@ uint32_t TOWNS_VRAM::read_io8(uint32_t address)
                return vram_access_reg_addr;
                break;
        case 0x045a:
-               switch(vram_access_reg_addr) {
-               case 0:
-                       return packed_pixel_mask_reg.b.l;
-                       break;
-               case 1:
-                       return packed_pixel_mask_reg.b.h2;
-                       break;
-               }                       
+               {
+                       uint8_t v = packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 0]; 
+                       return (uint32_t)v;
+               }
                break;
        case 0x045b:
-               switch(vram_access_reg_addr) {
-               case 0:
-                       return packed_pixel_mask_reg.b.h;
-                       break;
-               case 1:
-                       return packed_pixel_mask_reg.b.h3;
-                       break;
-               }                       
+               {
+                       uint8_t v = packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 1]; 
+                       return (uint32_t)v;
+               }
                break;
        case 0x5ee:
                // ToDo: Implement around VRAM cache.
@@ -527,14 +255,12 @@ uint32_t TOWNS_VRAM::read_io16(uint32_t address)
                return vram_access_reg_addr;
                break;
        case 0x045a:
-               switch(vram_access_reg_addr) {
-               case 0:
-                       return packed_pixel_mask_reg.w.l;
-                       break;
-               case 1:
-                       return packed_pixel_mask_reg.w.h;
-                       break;
-               }                       
+               {
+                       pair16_t w;
+                       w.b.l = packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 0]; 
+                       w.b.h = packed_pixel_mask_reg[(vram_access_reg_addr << 1) + 1]; 
+                       return (uint32_t)(w.w);
+               }
                break;
        case 0x05ee:
                {
@@ -548,7 +274,7 @@ uint32_t TOWNS_VRAM::read_io16(uint32_t address)
        return 0xffff;
 }
 
-#define STATE_VERSION  1
+#define STATE_VERSION  2
 
 bool TOWNS_VRAM::process_state(FILEIO* state_fio, bool loading)
 {
@@ -568,8 +294,8 @@ bool TOWNS_VRAM::process_state(FILEIO* state_fio, bool loading)
        state_fio->StateValue(sprite_disp_page);
 
        state_fio->StateValue(vram_access_reg_addr);
-       state_fio->StateValue(packed_pixel_mask_reg);
-
+       
+       state_fio->StateArray(packed_pixel_mask_reg, sizeof(packed_pixel_mask_reg), 1);
        state_fio->StateArray(vram, sizeof(vram), 1);
        
        return true;
index 8c56e6c..2af3c9b 100644 (file)
@@ -75,8 +75,8 @@ protected:
        // I/O 045AH (RW) : VRAM ACCESS CONTROLLER reg data (LOW).
        // I/O 045BH (RW) : VRAM ACCESS CONTROLLER reg data (HIGH).
        uint8_t vram_access_reg_addr;
-       pair32_t packed_pixel_mask_reg; // '1' = Write. I/O 0458H - 045BH.
-       uint8_t vram[0x80000]; // Related by machine.
+       __DECL_ALIGNED(16) uint8_t packed_pixel_mask_reg[8]; // '1' = Write. I/O 0458H - 045BH.
+       __DECL_ALIGNED(16) uint8_t vram[0x80000 + 8]; // Related by machine.
        // End.
 
        // Flags related by host renderer. Not saved.