OSDN Git Service

[COMMON] Fix unaligned SIMD variables.Fix crash built with "-msse2" at Win32.
[csp-qt/common_source_project-fm7.git] / source / src / vm / fm7 / vram.cpp
index e705461..95e98cc 100644 (file)
@@ -8,55 +8,13 @@
 #include "vm.h"
 #include "emu.h"
 #include "fm7_display.h"
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
-uint8_t DISPLAY::read_vram_l4_400l(uint32_t addr, uint32_t offset)
-{
 #if defined(_FM77L4)
-       if(addr < 0x8000) {
-               if(workram) {
-                       uint32_t raddr = addr & 0x3fff;
-                       if((multimode_accessmask & 0x04) == 0) {
-                               return gvram[0x8000 + (raddr + offset) & 0x7fff];
-                       }
-                       return 0xff;
-               }
-               pagemod = addr & 0x4000;
-               return gvram[((addr + offset) & mask) | pagemod];
-       } else if(addr < 0x9800) {
-               return textvram[addr & 0x0fff];
-       } else { // $9800-$bfff
-               return subrom_l4[addr - 0x9800];
-       }
+#include "../hd46505.h"
 #endif
-       return 0xff;
-}
-
-void DISPLAY::write_vram_l4_400l(uint32_t addr, uint32_t offset, uint32_t data)
-{
-#if defined(_FM77L4)
-       if(addr < 0x8000) {
-               if(workram) {
-                       uint32_t raddr = addr & 0x3fff;
-                       if((multimode_accessmask & 0x04) == 0) {
-                               gvram[0x8000 + (raddr + offset) & 0x7fff] = (uint8_t)data;
-                       }
-                       return;
-               }
-               pagemod = addr & 0x4000;
-               gvram[((addr + offset) & mask) | pagemod] = (uint8_t)data;
-       } else if(addr < 0x9800) {
-               textvram[addr & 0x0fff] = (uint8_t)data;
-       } else { // $9800-$bfff
-               //return subrom_l4[addr - 0x9800];
-       }
-       return;
-#endif 
-}
 
+extern config_t config;
 
+namespace FM7 {
 
 void DISPLAY::draw_screen()
 {
@@ -65,7 +23,6 @@ void DISPLAY::draw_screen()
 //#endif       
 }
 
-extern config_t config;
 void DISPLAY::draw_screen2()
 {
        int y;
@@ -155,42 +112,37 @@ void DISPLAY::draw_screen2()
        if(!(vram_wrote_shadow | ff)) return;
        vram_wrote_shadow = false;
        if(display_mode == DISPLAY_MODE_8_200L) {
+               _render_command_data_t cmd;
+               uint32_t yoff_d = 0;
                int ii;
                yoff = 0;
-#ifdef USE_GREEN_DISPLAY
-               if((config.dipswitch & FM7_DIPSW_GREEN_DISPLAY) != 0) {
-                       // Green display had only connected to FM-8, FM-7/NEW7 and FM-77.
-                       for(y = 0; y < 200; y += 8) {
-                               for(yy = 0; yy < 8; yy++) {
-                                       if(!(vram_draw_table[y + yy] | ff)) continue;
-                                       vram_draw_table[y + yy] = false;
-#if !defined(FIXED_FRAMEBUFFER_SIZE)
-                                       p = emu->get_screen_buffer(y + yy);
-                                       p2 = NULL;
+#if defined(USE_GREEN_DISPLAY)
+               if(use_green_monitor) {
+                       cmd.palette = dpalette_pixel_green;
+               } else {
+                       cmd.palette = dpalette_pixel;
+               }
 #else
-                                       p = emu->get_screen_buffer((y + yy) * 2);
-                                       p2 = emu->get_screen_buffer((y + yy) * 2 + 1);
-#endif
-                                       if(p == NULL) continue;
-                                       yoff = (y + yy) * 80;
-                                       {
-                                               for(x = 0; x < 10; x++) {
-                                                       for(ii = 0; ii < 8; ii++) {
-                                                               GETVRAM_8_200L_GREEN(yoff + ii, p, p2, false, scan_line);
-#if defined(FIXED_FRAMEBUFFER_SIZE)
-                                                               p2 += 8;
-#endif
-                                                               p += 8;
-                                                       }
-                                                       yoff += 8;
-                                               }
-                                       }
-                               }
-                       }
-                       if(ff) force_update = false;
-                       return;
+               cmd.palette = dpalette_pixel;
+#endif                         
+               for(int i = 0; i < 3; i++) {
+                       cmd.data[i] = gvram_shadow;
+                       cmd.baseaddress[i] = i * 0x4000;
+                       cmd.voffset[i] = yoff;
+                       cmd.is_render[i] = false;
                }
-#endif
+               if(!multimode_dispflags[0]) cmd.is_render[0] = true;
+               if(!multimode_dispflags[1]) cmd.is_render[1] = true;
+               if(!multimode_dispflags[2]) cmd.is_render[2] = true;
+               cmd.bit_trans_table[0] = (_bit_trans_table_t*)(&(bit_trans_table_2[0][0])); // B
+               cmd.bit_trans_table[1] = (_bit_trans_table_t*)(&(bit_trans_table_1[0][0])); // R
+               cmd.bit_trans_table[2] = (_bit_trans_table_t*)(&(bit_trans_table_0[0][0])); // G
+               cmd.xzoom = 1;
+               cmd.addrmask = 0x3fff;
+               cmd.addrmask2 = 0x3fff;
+               cmd.begin_pos = 0;
+               cmd.shift = 5;
+               cmd.render_width = 80;
                for(y = 0; y < 200; y += 8) {
                        for(yy = 0; yy < 8; yy++) {
                        
@@ -205,40 +157,263 @@ void DISPLAY::draw_screen2()
 #endif
                                if(p == NULL) continue;
                                yoff = (y + yy) * 80;
+                               for(int i = 0; i < 3; i++) {
+                                       cmd.voffset[i] = yoff;
+                               }
+                               
 # if defined(_FM77AV40EX) || defined(_FM77AV40SX)
+                               int dpage;
+                               dpage = vram_display_block;
+                               bool window_inv = false;
                                if(window_opened && (wy_low <= (y + yy)) && (wy_high > (y + yy))) {
-                                       for(x = 0; x < 80; x++) {
-                                               if((x >= wx_begin) && (x < wx_end)) {
-                                                       GETVRAM_8_200L(yoff, p, p2, true, scan_line);
-                                               } else {
-                                                       GETVRAM_8_200L(yoff, p, p2, false, scan_line);
+                                       if((wx_begin > 0) && (wx_begin < wx_end) && (wx_begin < 80)) {
+                                               // Window : left
+                                               cmd.begin_pos = 0;
+                                               window_inv = false;
+                                               int _wend = wx_end;
+                                               if(_wend >= 80) _wend = 80;
+                                               cmd.render_width = wx_begin;
+                                               yoff_d = (dpage != 0) ? 0x18000 : 0x00000;
+#if defined(_FM77AV_VARIANTS)
+                                               if(display_page_bak == 1) yoff_d += 0xc000;
+#endif
+                                               for(int i = 0; i < 3; i++) {
+                                                       cmd.baseaddress[i] = yoff_d + (i * 0x4000);
+                                               }
+                                               if(cmd.render_width > 0) {
+                                                       if(cmd.render_width > 80) cmd.render_width = 80;
+                                               }
+                                               Render8Colors_Line(&cmd, p, p2, scan_line);
+
+                                               // Center
+                                               cmd.begin_pos = wx_begin;
+                                               cmd.render_width = _wend - wx_begin;
+                                               yoff_d = (dpage != 0) ? 0x00000 : 0x18000;
+#if defined(_FM77AV_VARIANTS)
+                                               if(display_page_bak == 1) yoff_d += 0xc000;
+#endif
+                                               for(int i = 0; i < 3; i++) {
+                                                       cmd.baseaddress[i] = yoff_d + (i * 0x4000);
+                                               }
+                                               if(cmd.render_width > 0) {
+                                                       if(cmd.render_width > 80) cmd.render_width = 80;
+                                               }
+                                               Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), &(p2[cmd.begin_pos * 8]) , scan_line);
+                                               // Right
+                                               if(wx_end < 80) {
+                                                       cmd.begin_pos = wx_end;
+                                                       cmd.render_width = 80 - wx_end;
+                                                       yoff_d = (dpage != 0) ? 0x18000 : 0x00000;
+#if defined(_FM77AV_VARIANTS)
+                                                       if(display_page_bak == 1) yoff_d += 0xc000;
+#endif
+                                                       for(int i = 0; i < 3; i++) {
+                                                               cmd.baseaddress[i] = yoff_d + (i * 0x4000);
+                                                       }
+                                                       if(cmd.render_width > 0) {
+                                                               if(cmd.render_width > 80) cmd.render_width = 80;
+                                                       }
+                                                       Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), &(p2[cmd.begin_pos * 8]), scan_line);
                                                }
 #if defined(FIXED_FRAMEBUFFER_SIZE)
-                                               p2 += 8;
+                                               //CopyDrawnData(p, p2, 80, scan_line);
 #endif
-                                               p += 8;
-                                               yoff++;
+                                               continue;
+                                       } else if((wx_begin <= 0) && (wx_begin < wx_end) && (wx_end >= 0)) {
+                                               // Left
+                                               cmd.begin_pos = 0;
+                                               cmd.render_width = wx_end;
+                                               yoff_d = (dpage != 0) ? 0x00000 : 0x18000;
+#if defined(_FM77AV_VARIANTS)
+                                               if(display_page_bak == 1) yoff_d += 0xc000;
+#endif
+                                               for(int i = 0; i < 3; i++) {
+                                                       cmd.baseaddress[i] = yoff_d + (i * 0x4000);
+                                               }
+                                               if(cmd.render_width > 0) {
+                                                       if(cmd.render_width > 80) cmd.render_width = 80;
+                                               }
+                                               if(cmd.render_width > 0) Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), &(p2[cmd.begin_pos * 8]), scan_line);
+                                               // Right
+                                               if(wx_end < 80) {
+                                                       cmd.begin_pos = wx_end;
+                                                       cmd.render_width = 80 - wx_end;
+                                                       yoff_d = (dpage != 0) ? 0x18000 : 0x00000;
+#if defined(_FM77AV_VARIANTS)
+                                                       if(display_page_bak == 1) yoff_d += 0xc000;
+#endif
+                                                       for(int i = 0; i < 3; i++) {
+                                                               cmd.baseaddress[i] = yoff_d + (i * 0x4000);
+                                                       }
+                                                       if(cmd.render_width > 0) {
+                                                               if(cmd.render_width > 80) cmd.render_width = 80;
+                                                       }
+                                                       Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), &(p2[cmd.begin_pos * 8]), scan_line);
+                                               }
+#if defined(FIXED_FRAMEBUFFER_SIZE)
+//                                             CopyDrawnData(p, p2, 80, scan_line);
+#endif
+                                               continue;
                                        }
-                               } else
-# endif
-                               {
-                                       for(x = 0; x < 10; x++) {
-                                               for(ii = 0; ii < 8; ii++) {
-                                                       GETVRAM_8_200L(yoff + ii, p, p2, false, scan_line);
+                               }
+#endif
+                               //cmd.begin_pos = 0;
+                               //cmd.render_width = 80;
+# if defined(_FM77AV40EX) || defined(_FM77AV40SX)
+                               yoff_d = (dpage != 0) ? 0x18000 : 0x00000;
+#else
+//                             yoff_d = 0;
+#endif
+#if defined(_FM77AV_VARIANTS)
+                               if(display_page_bak == 1) yoff_d += 0xc000;
+                               for(int i = 0; i < 3; i++) {
+                                       cmd.baseaddress[i] = yoff_d + (i * 0x4000);
+                               }
+#else
+//                             for(int i = 0; i < 3; i++) {
+//                                     cmd.baseaddress[i] = i * 0x4000;
+//                             }
+#endif
+                               
+                               Render8Colors_Line(&cmd, p, p2, scan_line);
 #if defined(FIXED_FRAMEBUFFER_SIZE)
-                                                       p2 += 8;
+                               //CopyDrawnData(p, p2, 80, scan_line);
 #endif
+                       }
+               }
+               if(ff) force_update = false;
+               return;
+       }
+#if defined(_FM77L4)
+       if(display_mode == DISPLAY_MODE_1_400L) {
+               int ii;
+               uint8_t *regs = l4crtc->get_regs();
+               cursor_start = (int)(regs[10] & 0x1f);
+               cursor_end = (int)(regs[11] & 0x1f);
+               cursor_type = (int)((regs[10] & 0x60) >> 5);
+               text_xmax = (int)((uint16_t)regs[1] << 1);
+               text_lines = (int)((regs[9] & 0x1f) + 1);
+               text_ymax = (int)(regs[6] & 0x7f);
+               yoff = 0;
+               // Green display had only connected to FM-8, FM-7/NEW7 and FM-77.
+               for(y = 0; y < 400; y += 8) {
+                       bool renderf = false;
+                       uint32_t naddr;
+                       uint8_t bitcode;
+                       uint8_t charcode;
+                       uint8_t attr_code;
+                       scrntype_t on_color;
+                       int xlim, ylim;
+                       bool do_green;
+                       if((y & 0x0f) == 0) {
+                               for(yy = 0; yy < 16; yy++) renderf |= vram_draw_table[y + yy];
+                               renderf = renderf | ff;
+                               if(renderf) {
+                                       for(yy = 0; yy < 16; yy++) vram_draw_table[y + yy] = true;
+                               }
+                       }
+                       if(use_green_monitor) {
+                               for(yy = 0; yy < 8; yy++) {
+                                       if(!(vram_draw_table[y + yy] | ff)) continue;
+                                       vram_draw_table[y + yy] = false;
+                                       p = emu->get_screen_buffer(y + yy);
+                                       if(p == NULL) continue;
+                                       yoff = (y + yy) * 80;
+                                       for(x = 0; x < 10; x++) {
+                                               for(ii = 0; ii < 8; ii++) {
+                                                       GETVRAM_1_400L_GREEN(yoff + ii, p);
+                                                       p += 8;
+                                               }
+                                               yoff += 8;
+                                       }
+                               }
+                               do_green = true;
+                       } else {
+                               for(yy = 0; yy < 8; yy++) {
+                                       if(!(vram_draw_table[y + yy] | ff)) continue;
+                                       vram_draw_table[y + yy] = false;
+                                       p = emu->get_screen_buffer(y + yy);
+                                       if(p == NULL) continue;
+                                       yoff = (y + yy) * 80;
+                                       for(x = 0; x < 10; x++) {
+                                               for(ii = 0; ii < 8; ii++) {
+                                                       GETVRAM_1_400L(yoff + ii, p);
                                                        p += 8;
                                                }
                                                yoff += 8;
                                        }
                                }
+                               do_green = false;
+                       }
+                       // Draw Text
+                       if(renderf) {
+                               bool reverse;
+                               bool display_char;
+                               int raster;
+                               bool cursor_rev;
+                               uint8_t bitdata;
+                               if(text_width40) {
+                                       xlim = 40;
+                               } else {
+                                       xlim = 80;
+                               }
+                               
+                               for(x = 0; x < xlim; x++) {
+                                       naddr = (text_start_addr.w.l + ((y / text_lines) * text_xmax + x) * 2) & 0x0ffe;
+                                       charcode = text_vram[naddr];
+                                       attr_code = text_vram[naddr + 1];
+                                               
+                                       on_color = GETVRAM_TEXTCOLOR(attr_code, do_green);
+                                       
+                                       display_char = ((attr_code & 0x10) == 0);
+                                       reverse = ((attr_code & 0x08) != 0);
+                                       
+                                       for(yy = 0; yy < 16; yy++) {
+                                               raster = y % text_lines;
+                                               bitdata = 0x00;
+                                               p = emu->get_screen_buffer(y + yy);
+                                               if(p == NULL) continue;
+                                               if((raster < 16) && (display_char || text_blink)) {
+                                                       bitdata = subsys_cg_l4[(uint32_t)charcode * 16 + (uint32_t)raster];
+                                               }
+                                               cursor_rev = false;
+                                               if((naddr == (uint32_t)(cursor_addr.w.l)) && (cursor_type != 1) &&
+                                                  (text_blink || (cursor_type == 0))) {
+                                                       if((raster >= cursor_start) && (raster <= cursor_end)) {
+                                                               cursor_rev = true;
+                                                       }
+                                               }
+                                               bitdata = GETVRAM_TEXTPIX(bitdata, reverse, cursor_rev);
+                                               if(bitdata != 0) {
+                                                       if(text_width40) {
+                                                                       scrntype_t *pp = &(p[x * 2]); 
+                                                                       for(ii = 0; ii < 8; ii++) {
+                                                                               if((bitdata & 0x80) != 0) {
+                                                                                       p[0] = on_color;
+                                                                                       p[1] = on_color;
+                                                                               }
+                                                                               bitdata <<= 1;
+                                                                               p += 2;
+                                                                       }                                                                               
+                                                       } else {
+                                                               scrntype_t *pp = &(p[x * 2]); 
+                                                               for(ii = 0; ii < 8; ii++) {
+                                                                       if((bitdata & 0x80) != 0) {
+                                                                               p[0] = on_color;
+                                                                       }
+                                                                       bitdata <<= 1;
+                                                                       p += 1;
+                                                               }                                                                               
+                                                       }
+                                               }
+                                       }
+                               }
                        }
-                  
                }
                if(ff) force_update = false;
                return;
        }
+#endif
 # if defined(_FM77AV_VARIANTS)
        if(display_mode == DISPLAY_MODE_4096) {
                uint32_t mask = 0;
@@ -301,9 +476,28 @@ void DISPLAY::draw_screen2()
        }
 #  if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
        else if(display_mode == DISPLAY_MODE_8_400L) {
+               _render_command_data_t cmd;
                int ii;
                yoff = 0;
-               //rgbmask = ~multimode_dispmask;
+               cmd.palette = dpalette_pixel;
+               for(int i = 0; i < 3; i++) {
+                       cmd.data[i] = gvram_shadow;
+                       cmd.baseaddress[i] = i * 0x8000;
+                       cmd.voffset[i] = yoff;
+                       cmd.is_render[i] = false;
+               }
+               if(!multimode_dispflags[0]) cmd.is_render[0] = true;
+               if(!multimode_dispflags[1]) cmd.is_render[1] = true;
+               if(!multimode_dispflags[2]) cmd.is_render[2] = true;
+               cmd.bit_trans_table[0] = (_bit_trans_table_t*)(&(bit_trans_table_2[0][0])); // B
+               cmd.bit_trans_table[1] = (_bit_trans_table_t*)(&(bit_trans_table_1[0][0])); // R
+               cmd.bit_trans_table[2] = (_bit_trans_table_t*)(&(bit_trans_table_0[0][0])); // G
+               cmd.xzoom = 1;
+               cmd.addrmask = 0x7fff;
+               cmd.addrmask2 = 0x7fff;
+               cmd.begin_pos = 0;
+               cmd.shift = 5;
+               cmd.render_width = 80;
                for(y = 0; y < 400; y += 8) {
                        for(yy = 0; yy < 8; yy++) {
                                if(!(vram_draw_table[y + yy] | ff)) continue;
@@ -313,27 +507,97 @@ void DISPLAY::draw_screen2()
                                if(p == NULL) continue;
                                pp = p;
                                yoff = (y + yy) * 80;
+                               for(int i = 0; i < 3; i++) {
+                                       cmd.voffset[i] = yoff;
+                               }
+                               int dpage;
+                               bool window_inv = false;
+                               uint32_t yoff_d;
+                               dpage = vram_display_block;
 #    if defined(_FM77AV40EX) || defined(_FM77AV40SX)
-                               if(window_opened && (wy_low <= (y + yy)) && (wy_high  > (y + yy))) {
-                                       for(x = 0; x < 80; x++) {
-                                               if((x >= wx_begin) && (x < wx_end)) {
-                                                       GETVRAM_8_400L(yoff, p, true);
-                                               } else {
-                                                       GETVRAM_8_400L(yoff, p, false);
+                               if(window_opened && (wy_low <= (y + yy)) && (wy_high > (y + yy))) {
+                                       if((wx_begin > 0) && (wx_begin < wx_end) && (wx_begin < 80)) {
+                                               // Window : left
+                                               cmd.begin_pos = 0;
+                                               window_inv = false;
+                                               int _wend = wx_end;
+                                               if(_wend >= 80) _wend = 80;
+                                               cmd.render_width = wx_begin;
+                                               yoff_d = (dpage != 0) ? 0x18000 : 0x00000;
+                                               for(int i = 0; i < 3; i++) {
+                                                       cmd.baseaddress[i] = yoff_d + (i * 0x8000);
                                                }
-                                               p += 8;
-                                               yoff++;
-                                       }
-                               } else
-#    endif
-                                       for(x = 0; x < 10; x++) {
+                                               if(cmd.render_width > 0) {
+                                                       if(cmd.render_width > 80) cmd.render_width = 80;
+                                               }
+                                               Render8Colors_Line(&cmd, p, NULL, false);
 
-                                               for(ii = 0; ii < 8; ii++) {
-                                                       GETVRAM_8_400L(yoff + ii, p);
-                                                       p += 8;
+                                               // Center
+                                               cmd.begin_pos = wx_begin;
+                                               cmd.render_width = _wend - wx_begin;
+                                               yoff_d = (dpage != 0) ? 0x00000 : 0x18000;
+                                               if(display_page_bak == 1) yoff_d += 0xc000;
+                                               for(int i = 0; i < 3; i++) {
+                                                       cmd.baseaddress[i] = yoff_d + (i * 0x8000);
                                                }
-                                               yoff += 8;
-                                       }
+                                               if(cmd.render_width > 0) {
+                                                       if(cmd.render_width > 80) cmd.render_width = 80;
+                                               }
+                                               Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false);
+                                               // Right
+                                               if(wx_end < 80) {
+                                                       cmd.begin_pos = wx_end;
+                                                       cmd.render_width = 80 - wx_end;
+                                                       yoff_d = (dpage != 0) ? 0x18000 : 0x00000;
+                                                       for(int i = 0; i < 3; i++) {
+                                                               cmd.baseaddress[i] = yoff_d + (i * 0x8000);
+                                                       }
+                                                       if(cmd.render_width > 0) {
+                                                               if(cmd.render_width > 80) cmd.render_width = 80;
+                                                       }
+                                                       Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false);
+                                               }
+                                               continue;
+                                       } else if((wx_begin <= 0) && (wx_begin < wx_end) && (wx_end >= 0)) {
+                                               // Left
+                                               cmd.begin_pos = 0;
+                                               cmd.render_width = wx_end;
+                                               yoff_d = (dpage != 0) ? 0x00000 : 0x18000;
+                                               for(int i = 0; i < 3; i++) {
+                                                       cmd.baseaddress[i] = yoff_d + (i * 0x8000);
+                                               }
+                                               if(cmd.render_width > 0) {
+                                                       if(cmd.render_width > 80) cmd.render_width = 80;
+                                               }
+                                               if(cmd.render_width > 0) Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false);
+                                               // Right
+                                               if(wx_end < 80) {
+                                                       cmd.begin_pos = wx_end;
+                                                       cmd.render_width = 80 - wx_end;
+                                                       yoff_d = (dpage != 0) ? 0x18000 : 0x00000;
+                                                       for(int i = 0; i < 3; i++) {
+                                                               cmd.baseaddress[i] = yoff_d + (i * 0x8000);
+                                                       }
+                                                       if(cmd.render_width > 0) {
+                                                               if(cmd.render_width > 80) cmd.render_width = 80;
+                                                       }
+                                                       Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false);
+                                               }
+                                               continue;
+                                       } 
+                               }
+#    endif
+                               // Not Opened
+                               cmd.begin_pos = 0;
+                               cmd.render_width = 80;
+                               yoff_d = (dpage != 0) ? 0x18000 : 0x00000;
+                               for(int i = 0; i < 3; i++) {
+                                       cmd.baseaddress[i] = yoff_d + (i * 0x8000);
+                               }
+                               if(cmd.render_width > 0) {
+                                       if(cmd.render_width > 80) cmd.render_width = 80;
+                               }
+                               Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false);
                        }
                }
                if(ff) force_update = false;
@@ -396,153 +660,132 @@ void DISPLAY::reset_screen_update(void)
        screen_update_flag = false;
 }
 
-void DISPLAY::GETVRAM_8_200L(int yoff, scrntype_t *p,
-                                                        scrntype_t *px,
-                                                        bool window_inv,
-                                                        bool scan_line)
+void DISPLAY::CopyDrawnData(scrntype_t* src, scrntype_t* dst, int width, bool scan_line)
 {
-       uint8_t b, r, g;
-       uint32_t yoff_d;
-#if defined(_FM77AV40EX) || defined(_FM77AV40SX)
-       int dpage = vram_display_block;
+       if(dst == NULL) return;
+       if(src == NULL) return;
+#if defined(_RGB555) || defined(_RGBA565)
+       static const int shift_factor = 2;
+#else // 24bit
+       static const int shift_factor = 3;
 #endif
-       if(p == NULL) return;
-       yoff_d = 0;
-       yoff_d = (yoff + yoff_d) & 0x3fff;
-
-#if defined(_FM77AV40EX) || defined(_FM77AV40SX)
-       if(window_inv) {
-               if(dpage == 0) {
-                       dpage = 1;
-               } else {
-                       dpage = 0;
+       scrntype_vec8_t* vsrc = (scrntype_vec8_t*)__builtin_assume_aligned(src, sizeof(scrntype_vec8_t));
+       scrntype_vec8_t* vdst = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
+       __DECL_ALIGNED(32) scrntype_vec8_t sline;
+       
+       if(scan_line) {
+__DECL_VECTORIZED_LOOP
+               for(int i = 0; i < 8; i++) {
+                       sline.w[i] = (scrntype_t)RGBA_COLOR(31, 31, 31, 255);
+               }
+__DECL_VECTORIZED_LOOP
+               for(int i = 0; i < width; i++) {
+                       tmp_dd.v = vsrc[i].v;
+                       tmp_dd.v = tmp_dd.v >> shift_factor;
+                       tmp_dd.v = tmp_dd.v & sline.v;
+                       vdst[i].v = tmp_dd.v;
+               }
+       } else {
+__DECL_VECTORIZED_LOOP
+               for(int i = 0; i < width; i++) {
+                       tmp_dd.v = vsrc[i].v;
+                       vdst[i].v = tmp_dd.v;
                }
        }
-       if(dpage != 0) yoff_d += 0x18000;
-#endif
-       b = r = g = 0;
-#if defined(_FM77AV_VARIANTS)
-       if(display_page_bak == 1) yoff_d += 0xc000;
-#endif
-       if(!multimode_dispflags[0]) b = gvram_shadow[yoff_d + 0x00000];
-       if(!multimode_dispflags[1]) r = gvram_shadow[yoff_d + 0x04000];
-       if(!multimode_dispflags[2]) g = gvram_shadow[yoff_d + 0x08000];
+}
 
-       uint16_t *pg = &(bit_trans_table_0[g][0]);
-       uint16_t *pr = &(bit_trans_table_1[r][0]);
-       uint16_t *pb = &(bit_trans_table_2[b][0]);
-       uint16_t tmp_d[8];
-       scrntype_t tmp_dd[8];
 
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               tmp_d[i] = pr[i];
-               tmp_d[i]  = tmp_d[i] | pg[i];
-               tmp_d[i]  = tmp_d[i] | pb[i];
-               tmp_d[i] = tmp_d[i] >> 5;
-       }
+#if defined(_FM77L4)
+scrntype_t DISPLAY::GETVRAM_TEXTCOLOR(uint8_t attr, bool do_green)
+{
+       int color = attr & 0x07;
+       int r, g, b;
 
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               tmp_dd[i] = dpalette_pixel[tmp_d[i]];
-       }
-#if defined(FIXED_FRAMEBUFFER_SIZE)
-       if(scan_line) {
-/* Fancy scanline */
-       #if defined(_RGB555) || defined(_RGBA565)
-               static const int shift_factor = 2;
-       #else // 24bit
-               static const int shift_factor = 3;
-       #endif
-__DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 8; i++) {
-                       p[i] = tmp_dd[i];
-                       tmp_dd[i] = (tmp_dd[i] >> shift_factor) & (const scrntype_t)RGBA_COLOR(31, 31, 31, 255);;
-                       px[i] = tmp_dd[i];
+       static const int green_g_table[16] = {0, 24, 48, 64, 80, 96, 112, 128,
+                                                                                 140, 155, 175, 186, 210, 220, 240, 255};
+       if(do_green) {
+               if((attr & 0x20) != 0) color += 8;
+               r = b = 0;
+               g = green_g_table[color];
+               if(color >= 10) {
+                       r = (color - 9) * 16;
+                       b = (color - 9) * 16;
                }
        } else {
-__DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 8; i++) {
-                       p[i] = tmp_dd[i];
-                       px[i] = tmp_dd[i];
+               if((attr & 0x20) != 0) {
+                       g = ((color & 4) != 0) ? 255 : 0;
+                       r = ((color & 2) != 0) ? 255 : 0;
+                       b = ((color & 1) != 0) ? 255 : 0;
+               } else {
+                       g = ((color & 4) != 0) ? 128 : 0;
+                       r = ((color & 2) != 0) ? 128 : 0;
+                       b = ((color & 1) != 0) ? 128 : 0;
                }
        }
-#else
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               p[i] = tmp_dd[i];
+       return RGBA_COLOR(r, g, b, 255);
+}
+
+uint8_t DISPLAY::GETVRAM_TEXTPIX(uint8_t bitdata, bool reverse, bool cursor_rev)
+{
+       uint8_t ret = bitdata;
+       if(reverse) {
+               ret = (uint8_t)(~ret);
        }
-#endif 
+       if(cursor_rev) {
+           ret = (uint8_t)(~ret);
+       }
+       return ret;
 }
 
-#if defined(USE_GREEN_DISPLAY)
-void DISPLAY::GETVRAM_8_200L_GREEN(int yoff, scrntype_t *p,
-                                                        scrntype_t *px,
-                                                        bool window_inv,
-                                                        bool scan_line)
+void DISPLAY::GETVRAM_1_400L(int yoff, scrntype_t *p)
 {
-       uint8_t b, r, g;
+       uint8_t pixel;
        uint32_t yoff_d;
-#if defined(_FM77AV40EX) || defined(_FM77AV40SX)
-       int dpage = vram_display_block;
-#endif
        if(p == NULL) return;
-       yoff_d = 0;
-       yoff_d = (yoff + yoff_d) & 0x3fff;
-
-       b = r = g = 0;
-       if(!multimode_dispflags[0]) b = gvram_shadow[yoff_d + 0x00000];
-       if(!multimode_dispflags[1]) r = gvram_shadow[yoff_d + 0x04000];
-       if(!multimode_dispflags[2]) g = gvram_shadow[yoff_d + 0x08000];
-
-       uint16_t *pg = &(bit_trans_table_0[g][0]);
-       uint16_t *pr = &(bit_trans_table_1[r][0]);
-       uint16_t *pb = &(bit_trans_table_2[b][0]);
-       uint16_t tmp_d[8];
-       scrntype_t tmp_dd[8];
+       yoff_d = yoff & 0x7fff;
+       pixel = gvram_shadow[yoff_d];
+       uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16);
+       __DECL_ALIGNED(16) uint16_vec8_t tmp_d;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
+       scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t));
 
+       tmp_d.v = ppx->v;
+       tmp_d.v = tmp_d.v >> 5;
+       
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 8; i++) {
-               tmp_d[i] = pr[i];
-               tmp_d[i]  = tmp_d[i] | pg[i];
-               tmp_d[i]  = tmp_d[i] | pb[i];
-               tmp_d[i] = tmp_d[i] >> 5;
+               tmp_dd.w[i] = dpalette_pixel[tmp_d.w[i]];
        }
 
+       vp->v = tmp_dd.v;
+}
+
+void DISPLAY::GETVRAM_1_400L_GREEN(int yoff, scrntype_t *p)
+{
+       uint8_t pixel;
+       uint32_t yoff_d;
+       if(p == NULL) return;
+       yoff_d = yoff & 0x7fff;
+       pixel = gvram_shadow[yoff_d];
+       uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16);
+       __DECL_ALIGNED(16) uint16_vec8_t tmp_d;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
+       scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t));
+
+       tmp_d.v = ppx->v;
+       tmp_d.v = tmp_d.v >> 5;
+       
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 8; i++) {
-               tmp_dd[i] = dpalette_pixel_green[tmp_d[i]];
-       }
-#if defined(FIXED_FRAMEBUFFER_SIZE)
-       if(scan_line) {
-/* Fancy scanline */
-       #if defined(_RGB555) || defined(_RGBA565)
-               static const int shift_factor = 2;
-       #else // 24bit
-               static const int shift_factor = 3;
-       #endif
-__DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 8; i++) {
-                       p[i] = tmp_dd[i];
-                       tmp_dd[i] = (tmp_dd[i] >> shift_factor) & (const scrntype_t)RGBA_COLOR(31, 31, 31, 255);;
-                       px[i] = tmp_dd[i];
-               }
-       } else {
-__DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 8; i++) {
-                       p[i] = tmp_dd[i];
-                       px[i] = tmp_dd[i];
-               }
-       }
-#else
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               p[i] = tmp_dd[i];
+               tmp_dd.w[i] = dpalette_pixel_green[tmp_d.w[i]];
        }
-#endif 
+       vp->v = tmp_dd.v;
+
 }
 #endif
 
+
 #if defined(_FM77AV_VARIANTS)
 void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px,
                                                   uint32_t mask,
@@ -551,8 +794,8 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px,
 {
        uint32_t b3, r3, g3;
        uint8_t  bb[4], rr[4], gg[4];
-       uint16_t pixels[8];
-       const uint16_t __masks[8] = {(uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask};
+       __DECL_ALIGNED(16) uint16_vec8_t pixels;
+       __DECL_ALIGNED(16) const uint16_t __masks[8] = {(uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask};
        scrntype_t b, r, g;
        uint32_t idx;;
        scrntype_t pixel;
@@ -582,157 +825,113 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px,
        bb[1] = gvram_shadow[yoff_d1 + 0x02000];
        rr[0] = gvram_shadow[yoff_d1 + 0x04000];
        rr[1] = gvram_shadow[yoff_d1 + 0x06000];
-               
        gg[0] = gvram_shadow[yoff_d1 + 0x08000];
        gg[1] = gvram_shadow[yoff_d1 + 0x0a000];
                
        bb[2] = gvram_shadow[yoff_d2 + 0x0c000];
        bb[3] = gvram_shadow[yoff_d2 + 0x0e000];
-               
        rr[2] = gvram_shadow[yoff_d2 + 0x10000];
        rr[3] = gvram_shadow[yoff_d2 + 0x12000];
        gg[2] = gvram_shadow[yoff_d2 + 0x14000];
        gg[3] = gvram_shadow[yoff_d2 + 0x16000];
 
-       uint16_t tmp_g[8], tmp_r[8], tmp_b[8];
        uint16_t *p0, *p1, *p2, *p3;
 #if !defined(FIXED_FRAMEBUFFER_SIZE)
-       scrntype_t tmp_dd[8];
+       __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[8];
 #else
-       scrntype_t tmp_dd[16];
+       __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[16];
 #endif
+       __DECL_ALIGNED(16) uint16_vec8_t tmp_g, tmp_r, tmp_b;
+       __v8hi *vp0, *vp1, *vp2, *vp3;
        // G
-       p0 = &(bit_trans_table_0[gg[0]][0]);
-       p1 = &(bit_trans_table_1[gg[1]][0]);
-       p2 = &(bit_trans_table_2[gg[2]][0]);
-       p3 = &(bit_trans_table_3[gg[3]][0]);
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               tmp_g[i]  = p0[i];
-               tmp_g[i]  = tmp_g[i] | p1[i];
-               tmp_g[i]  = tmp_g[i] | p2[i];
-               tmp_g[i]  = tmp_g[i] | p3[i];
-       }
+       vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[gg[0]][0]), 16);
+       vp1 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_1[gg[1]][0]), 16);
+       vp2 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_2[gg[2]][0]), 16);
+       vp3 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_3[gg[3]][0]), 16);
+       tmp_g.v = *vp0;
+       tmp_g.v = tmp_g.v | *vp1;
+       tmp_g.v = tmp_g.v | *vp2;
+       tmp_g.v = tmp_g.v | *vp3;
        // R
-       p0 = &(bit_trans_table_0[rr[0]][0]);
-       p1 = &(bit_trans_table_1[rr[1]][0]);
-       p2 = &(bit_trans_table_2[rr[2]][0]);
-       p3 = &(bit_trans_table_3[rr[3]][0]);
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               tmp_r[i]  = p0[i];
-               tmp_r[i]  = tmp_r[i] | p1[i];
-               tmp_r[i]  = tmp_r[i] | p2[i];
-               tmp_r[i]  = tmp_r[i] | p3[i];
-       }
+       vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[rr[0]][0]), 16);
+       vp1 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_1[rr[1]][0]), 16);
+       vp2 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_2[rr[2]][0]), 16);
+       vp3 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_3[rr[3]][0]), 16);
+       tmp_r.v = *vp0;
+       tmp_r.v = tmp_r.v | *vp1;
+       tmp_r.v = tmp_r.v | *vp2;
+       tmp_r.v = tmp_r.v | *vp3;
+
        // B
-       p0 = &(bit_trans_table_0[bb[0]][0]);
-       p1 = &(bit_trans_table_1[bb[1]][0]);
-       p2 = &(bit_trans_table_2[bb[2]][0]);
-       p3 = &(bit_trans_table_3[bb[3]][0]);
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               tmp_b[i]  = p0[i];
-               tmp_b[i]  = tmp_b[i] | p1[i];
-               tmp_b[i]  = tmp_b[i] | p2[i];
-               tmp_b[i]  = tmp_b[i] | p3[i];
-       }
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               tmp_g[i] <<= 4;
-               pixels[i] = tmp_b[i] >> 4;
-               pixels[i] = pixels[i] | tmp_r[i];
-               pixels[i] = pixels[i] | tmp_g[i];
-               pixels[i] = pixels[i] & __masks[i];
-       }
-       //for(int i = 0; i < 8; i++) {
-       //      pixels[i] = pixels[i] & mask;
-       //}
+       vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[bb[0]][0]), 16);
+       vp1 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_1[bb[1]][0]), 16);
+       vp2 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_2[bb[2]][0]), 16);
+       vp3 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_3[bb[3]][0]), 16);
+       tmp_b.v = *vp0;
+       tmp_b.v = tmp_b.v | *vp1;
+       tmp_b.v = tmp_b.v | *vp2;
+       tmp_b.v = tmp_b.v | *vp3;
+       
+       __v8hi *mp = (__v8hi*)__masks;
+       tmp_g.v = tmp_g.v << 4;
+       tmp_b.v = tmp_b.v >> 4;
+       pixels.v = tmp_b.v;
+       pixels.v = pixels.v | tmp_r.v;
+       pixels.v = pixels.v | tmp_g.v;
+       pixels.v = pixels.v & *mp;
+       
+
+       scrntype_vec8_t *vp = (scrntype_vec8_t*)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t));
+       scrntype_vec8_t *dp = (scrntype_vec8_t*)__builtin_assume_aligned(tmp_dd, sizeof(scrntype_vec8_t));
 #if !defined(FIXED_FRAMEBUFFER_SIZE)
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 8; i++) {
                tmp_dd[i] = analog_palette_pixel[pixels[i]];
-               p[i] = tmp_dd[i];
        }
+       vp->v = dp->v;
 #else
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 8; i++) {
-               tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = analog_palette_pixel[pixels[i]];;
+               tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = analog_palette_pixel[pixels.w[i]];;
        }
+       scrntype_vec8_t *vpx = (scrntype_vec8_t*)__builtin_assume_aligned(px, sizeof(scrntype_vec8_t));
+       __DECL_ALIGNED(32) scrntype_vec8_t vmask;
 __DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 16; i++) {
-               p[i] = tmp_dd[i];
+       for(int i = 0; i < 2; i++) {
+               vp[i].v = dp[i].v;
        }
        if(scan_line) {
 /* Fancy scanline */
 __DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 16; i++) {
+               for(int i = 0; i < 2; i++) {
 #if defined(_RGB888) || defined(_RGBA888)
-                       tmp_dd[i] = tmp_dd[i] >> 3;
+                       dp[i].v = dp[i].v >> 3;
 #elif defined(_RGB555)
-                       tmp_dd[i] = tmp_dd[i] >> 2;
+                       dp[i].v = dp[i].v >> 2;
 #elif defined(_RGB565)
-                       tmp_dd[i] = tmp_dd[i] >> 2;
+                       dp[i].v = dp[i].v >> 2;
 #endif
                }
 __DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 16; i++) {
-                       tmp_dd[i] = tmp_dd[i] & (const scrntype_t)RGBA_COLOR(31, 31, 31, 255);
-                       px[i] = tmp_dd[i];
+               for(int i = 0; i < 8; i++) {
+                       vmask.w[i] = (const scrntype_t)RGBA_COLOR(31, 31, 31, 255);
+               }
+__DECL_VECTORIZED_LOOP
+               for(int i = 0; i < 2; i++) {
+                       dp[i].v = dp[i].v & vmask.v; 
+                       vpx[i].v = dp[i].v;
                }
        } else {
 __DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 16; i++) {
-                       px[i] = tmp_dd[i];
+               for(int i = 0; i < 2; i++) {
+                       vpx[i].v = dp[i].v;
                }
        }
-#endif
-
+#endif 
 }
 #endif
 
 #if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
-void DISPLAY::GETVRAM_8_400L(int yoff, scrntype_t *p,
-                                                        bool window_inv)
-{
-       uint8_t b, r, g;
-       uint32_t dot;
-       uint32_t yoff_d;
-# if defined(_FM77AV40EX) || defined(_FM77AV40SX)
-       int dpage = vram_display_block;
-# endif
-       if(p == NULL) return;
-       yoff_d = yoff;
-# if defined(_FM77AV40EX) || defined(_FM77AV40SX)
-       if(window_inv) {
-               if(dpage == 0) {
-                       dpage = 1;
-               } else {
-                       dpage = 0;
-               }
-       }
-       if(dpage != 0) yoff_d += 0x18000;
-# endif
-       b = r = g = 0;
-       if(!multimode_dispflags[0]) b = gvram_shadow[yoff_d + 0x00000];
-       if(!multimode_dispflags[1]) r = gvram_shadow[yoff_d + 0x08000];
-       if(!multimode_dispflags[2]) g = gvram_shadow[yoff_d + 0x10000];
-
-       uint16_t *pg = &(bit_trans_table_0[g][0]);
-       uint16_t *pr = &(bit_trans_table_1[r][0]);
-       uint16_t *pb = &(bit_trans_table_2[b][0]);
-       uint16_t tmp_d[8];
-
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               tmp_d[i] = pg[i] | pr[i] | pb[i];
-               tmp_d[i] = tmp_d[i] >> 5;
-       }               
-__DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 8; i++) {
-               p[i] = dpalette_pixel[tmp_d[i]];
-       }
-}
 
 void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_line)
 {
@@ -757,12 +956,15 @@ void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_li
        yoff_d2 = yoff;
 
        uint8_t  bb[8], rr[8], gg[8];
-       uint16_t *p0, *p1, *p2, *p3, *p4, *p5;
-       uint32_t _btmp[8], _rtmp[8], _gtmp[8];
+
+       __DECL_ALIGNED(16) uint16_vec8_t _btmp;
+       __DECL_ALIGNED(16) uint16_vec8_t _rtmp;
+       __DECL_ALIGNED(16) uint16_vec8_t _gtmp;
+       uint16_vec8_t *vp0, *vp1, *vp2, *vp3, *vp4, *vp5;
 #if !defined(FIXED_FRAMEBUFFER_SIZE)
-       scrntype_t tmp_dd[8];
+       __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[8];
 #else
-       scrntype_t tmp_dd[16];
+       __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[16];
 #endif
 //     if(mask & 0x01) {
        if(!multimode_dispflags[0]) {
@@ -776,25 +978,22 @@ void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_li
                bb[4] = gvram_shadow[yoff_d1 + 0x18000];
                bb[5] = gvram_shadow[yoff_d1 + 0x1a000];
                
-               p0 = &(bit_trans_table_0[bb[0]][0]);
-               p1 = &(bit_trans_table_1[bb[1]][0]);
-               p2 = &(bit_trans_table_2[bb[2]][0]);
-               p3 = &(bit_trans_table_3[bb[3]][0]);
-               p4 = &(bit_trans_table_4[bb[4]][0]);
-               p5 = &(bit_trans_table_5[bb[5]][0]);
-__DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 8; i++) {
-                       _btmp[i] = p0[i];
-                       _btmp[i] = _btmp[i] | p1[i];
-                       _btmp[i] = _btmp[i] | p2[i];
-                       _btmp[i] = _btmp[i] | p3[i];
-                       _btmp[i] = _btmp[i] | p4[i];
-                       _btmp[i] = _btmp[i] | p5[i];
-               }
+               vp0 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_0[bb[0]][0]), 16);
+               vp1 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_1[bb[1]][0]), 16);
+               vp2 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_2[bb[2]][0]), 16);
+               vp3 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_3[bb[3]][0]), 16);
+               vp4 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_4[bb[4]][0]), 16);
+               vp5 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_5[bb[5]][0]), 16);
+               _btmp.v = vp0->v;
+               _btmp.v = _btmp.v | vp1->v;
+               _btmp.v = _btmp.v | vp2->v;
+               _btmp.v = _btmp.v | vp3->v;
+               _btmp.v = _btmp.v | vp4->v;
+               _btmp.v = _btmp.v | vp5->v;
        } else {
 __DECL_VECTORIZED_LOOP
                for(int i = 0; i < 8; i++) {
-                       _btmp[i] = 0;
+                       _btmp.w[i] = 0;
                }
        }
        if(!multimode_dispflags[1]) {
@@ -809,25 +1008,22 @@ __DECL_VECTORIZED_LOOP
                rr[4] = gvram_shadow[yoff_d1 + 0x1c000];
                rr[5] = gvram_shadow[yoff_d1 + 0x1e000];
                
-               p0 = &(bit_trans_table_0[rr[0]][0]);
-               p1 = &(bit_trans_table_1[rr[1]][0]);
-               p2 = &(bit_trans_table_2[rr[2]][0]);
-               p3 = &(bit_trans_table_3[rr[3]][0]);
-               p4 = &(bit_trans_table_4[rr[4]][0]);
-               p5 = &(bit_trans_table_5[rr[5]][0]);
-__DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 8; i++) {
-                       _rtmp[i] = p0[i];
-                       _rtmp[i] = _rtmp[i] | p1[i];
-                       _rtmp[i] = _rtmp[i] | p2[i];
-                       _rtmp[i] = _rtmp[i] | p3[i];
-                       _rtmp[i] = _rtmp[i] | p4[i];
-                       _rtmp[i] = _rtmp[i] | p5[i];
-               }
+               vp0 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_0[rr[0]][0]), 16);
+               vp1 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_1[rr[1]][0]), 16);
+               vp2 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_2[rr[2]][0]), 16);
+               vp3 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_3[rr[3]][0]), 16);
+               vp4 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_4[rr[4]][0]), 16);
+               vp5 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_5[rr[5]][0]), 16);
+               _rtmp.v = vp0->v;
+               _rtmp.v = _rtmp.v | vp1->v;
+               _rtmp.v = _rtmp.v | vp2->v;
+               _rtmp.v = _rtmp.v | vp3->v;
+               _rtmp.v = _rtmp.v | vp4->v;
+               _rtmp.v = _rtmp.v | vp5->v;
        } else {
 __DECL_VECTORIZED_LOOP
                for(int i = 0; i < 8; i++) {
-                       _rtmp[i] = 0;
+                       _rtmp.w[i] = 0;
                }
        }
        if(!multimode_dispflags[2]) {
@@ -842,65 +1038,73 @@ __DECL_VECTORIZED_LOOP
                gg[4] = gvram_shadow[yoff_d1 + 0x20000];
                gg[5] = gvram_shadow[yoff_d1 + 0x22000];
                
-               p0 = &(bit_trans_table_0[gg[0]][0]);
-               p1 = &(bit_trans_table_1[gg[1]][0]);
-               p2 = &(bit_trans_table_2[gg[2]][0]);
-               p3 = &(bit_trans_table_3[gg[3]][0]);
-               p4 = &(bit_trans_table_4[gg[4]][0]);
-               p5 = &(bit_trans_table_5[gg[5]][0]);
-__DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 8; i++) {
-                       _gtmp[i] = p0[i];
-                       _gtmp[i] = _gtmp[i] | p1[i];
-                       _gtmp[i] = _gtmp[i] | p2[i];
-                       _gtmp[i] = _gtmp[i] | p3[i];
-                       _gtmp[i] = _gtmp[i] | p4[i];
-                       _gtmp[i] = _gtmp[i] | p5[i];
-               }
+               vp0 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_0[gg[0]][0]), 16);
+               vp1 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_1[gg[1]][0]), 16);
+               vp2 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_2[gg[2]][0]), 16);
+               vp3 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_3[gg[3]][0]), 16);
+               vp4 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_4[gg[4]][0]), 16);
+               vp5 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_5[gg[5]][0]), 16);
+               _gtmp.v = vp0->v;
+               _gtmp.v = _gtmp.v | vp1->v;
+               _gtmp.v = _gtmp.v | vp2->v;
+               _gtmp.v = _gtmp.v | vp3->v;
+               _gtmp.v = _gtmp.v | vp4->v;
+               _gtmp.v = _gtmp.v | vp5->v;
        } else {
 __DECL_VECTORIZED_LOOP
                for(int i = 0; i < 8; i++) {
-                       _gtmp[i] = 0;
+                       _gtmp.w[i] = 0;
                }
        }
+
+       scrntype_vec8_t* vpp = (scrntype_vec8_t*)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t));
+       scrntype_vec8_t* dp = (scrntype_vec8_t*)tmp_dd;
 #if !defined(FIXED_FRAMEBUFFER_SIZE)
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 8; i++) {
-               tmp_dd[i] = RGB_COLOR(_rtmp[i], _gtmp[i], _btmp[i]);
-               p[i] = tmp_dd[i];
+               tmp_dd[i] = RGB_COLOR(_rtmp.w[i], _gtmp.w[i], _btmp.w[i]);
        }
+       vpp->v = dp->v;
 #else
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 8; i++) {
-               tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = RGB_COLOR(_rtmp[i], _gtmp[i], _btmp[i]);
+               tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = RGB_COLOR(_rtmp.w[i], _gtmp.w[i], _btmp.w[i]);
        }
+
 __DECL_VECTORIZED_LOOP
-       for(int i = 0; i < 16; i++) {
-               p[i] = tmp_dd[i];
+       for(int i = 0; i < 2; i++) {
+               vpp[i].v = dp[i].v;
        }
+       scrntype_vec8_t* vpx = (scrntype_vec8_t*)__builtin_assume_aligned(px, sizeof(scrntype_vec8_t));
        if(scan_line) {
 /* Fancy scanline */
 __DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 16; i++) {
+               for(int i = 0; i < 2; i++) {
 #if defined(_RGB888) || defined(_RGBA888)
-                       tmp_dd[i] = tmp_dd[i] >> 3;
+                       dp[i].v = dp[i].v >> 3;
 #elif defined(_RGB555)
-                       tmp_dd[i] = tmp_dd[i] >> 2;
+                       dp[i].v = dp[i].v >> 2;
 #elif defined(_RGB565)
-                       tmp_dd[i] = tmp_dd[i] >> 2;
+                       dp[i].v = dp[i].v >> 2;
 #endif
                }
+               __DECL_ALIGNED(32) scrntype_vec8_t scanline_data;
 __DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 16; i++) {
-                       tmp_dd[i] = tmp_dd[i] & RGBA_COLOR(31, 31, 31, 256);
-                       px[i] = tmp_dd[i];
+               for(int i = 0; i < 8; i++) {
+                       scanline_data.w[i] = RGBA_COLOR(31, 31, 31, 255);
                }
-       } else {
 __DECL_VECTORIZED_LOOP
-               for(int i = 0; i < 16; i++) {
-                       px[i] = tmp_dd[i];
+               for(int i = 0; i < 2; i++) {
+                       dp[i].v = dp[i].v & scanline_data.v;
+                       vpx[i].v = dp[i].v;
+               }
+       } else {
+               for(int i = 0; i < 2; i++) {
+                       vpx[i].v = dp[i].v;
                }
        }
 #endif 
 }
 #endif
+
+}