X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=source%2Fsrc%2Fvm%2Ffm7%2Fvram.cpp;h=95e98ccb70f84beb64716b64dccdc12cfd5466bc;hb=825bf2ca6d1fdd3b866e72eaf7e6db32a4a704c4;hp=e70546109b843d717bdc78ef2b7252aae7536228;hpb=023a9aaed0eed332a65d618b7aa815aeecf05ea5;p=csp-qt%2Fcommon_source_project-fm7.git diff --git a/source/src/vm/fm7/vram.cpp b/source/src/vm/fm7/vram.cpp index e70546109..95e98ccb7 100644 --- a/source/src/vm/fm7/vram.cpp +++ b/source/src/vm/fm7/vram.cpp @@ -8,55 +8,13 @@ #include "vm.h" #include "emu.h" #include "fm7_display.h" -#if defined(_OPENMP) -#include -#endif - -uint8_t DISPLAY::read_vram_l4_400l(uint32_t addr, uint32_t offset) -{ #if defined(_FM77L4) - if(addr < 0x8000) { - if(workram) { - uint32_t raddr = addr & 0x3fff; - if((multimode_accessmask & 0x04) == 0) { - return gvram[0x8000 + (raddr + offset) & 0x7fff]; - } - return 0xff; - } - pagemod = addr & 0x4000; - return gvram[((addr + offset) & mask) | pagemod]; - } else if(addr < 0x9800) { - return textvram[addr & 0x0fff]; - } else { // $9800-$bfff - return subrom_l4[addr - 0x9800]; - } +#include "../hd46505.h" #endif - return 0xff; -} - -void DISPLAY::write_vram_l4_400l(uint32_t addr, uint32_t offset, uint32_t data) -{ -#if defined(_FM77L4) - if(addr < 0x8000) { - if(workram) { - uint32_t raddr = addr & 0x3fff; - if((multimode_accessmask & 0x04) == 0) { - gvram[0x8000 + (raddr + offset) & 0x7fff] = (uint8_t)data; - } - return; - } - pagemod = addr & 0x4000; - gvram[((addr + offset) & mask) | pagemod] = (uint8_t)data; - } else if(addr < 0x9800) { - textvram[addr & 0x0fff] = (uint8_t)data; - } else { // $9800-$bfff - //return subrom_l4[addr - 0x9800]; - } - return; -#endif -} +extern config_t config; +namespace FM7 { void DISPLAY::draw_screen() { @@ -65,7 +23,6 @@ void DISPLAY::draw_screen() //#endif } -extern config_t config; void DISPLAY::draw_screen2() { int y; @@ -155,42 +112,37 @@ void DISPLAY::draw_screen2() if(!(vram_wrote_shadow | ff)) return; vram_wrote_shadow = false; if(display_mode == DISPLAY_MODE_8_200L) { + _render_command_data_t cmd; + uint32_t yoff_d = 0; int ii; yoff = 0; -#ifdef USE_GREEN_DISPLAY - if((config.dipswitch & FM7_DIPSW_GREEN_DISPLAY) != 0) { - // Green display had only connected to FM-8, FM-7/NEW7 and FM-77. - for(y = 0; y < 200; y += 8) { - for(yy = 0; yy < 8; yy++) { - if(!(vram_draw_table[y + yy] | ff)) continue; - vram_draw_table[y + yy] = false; -#if !defined(FIXED_FRAMEBUFFER_SIZE) - p = emu->get_screen_buffer(y + yy); - p2 = NULL; +#if defined(USE_GREEN_DISPLAY) + if(use_green_monitor) { + cmd.palette = dpalette_pixel_green; + } else { + cmd.palette = dpalette_pixel; + } #else - p = emu->get_screen_buffer((y + yy) * 2); - p2 = emu->get_screen_buffer((y + yy) * 2 + 1); -#endif - if(p == NULL) continue; - yoff = (y + yy) * 80; - { - for(x = 0; x < 10; x++) { - for(ii = 0; ii < 8; ii++) { - GETVRAM_8_200L_GREEN(yoff + ii, p, p2, false, scan_line); -#if defined(FIXED_FRAMEBUFFER_SIZE) - p2 += 8; -#endif - p += 8; - } - yoff += 8; - } - } - } - } - if(ff) force_update = false; - return; + cmd.palette = dpalette_pixel; +#endif + for(int i = 0; i < 3; i++) { + cmd.data[i] = gvram_shadow; + cmd.baseaddress[i] = i * 0x4000; + cmd.voffset[i] = yoff; + cmd.is_render[i] = false; } -#endif + if(!multimode_dispflags[0]) cmd.is_render[0] = true; + if(!multimode_dispflags[1]) cmd.is_render[1] = true; + if(!multimode_dispflags[2]) cmd.is_render[2] = true; + cmd.bit_trans_table[0] = (_bit_trans_table_t*)(&(bit_trans_table_2[0][0])); // B + cmd.bit_trans_table[1] = (_bit_trans_table_t*)(&(bit_trans_table_1[0][0])); // R + cmd.bit_trans_table[2] = (_bit_trans_table_t*)(&(bit_trans_table_0[0][0])); // G + cmd.xzoom = 1; + cmd.addrmask = 0x3fff; + cmd.addrmask2 = 0x3fff; + cmd.begin_pos = 0; + cmd.shift = 5; + cmd.render_width = 80; for(y = 0; y < 200; y += 8) { for(yy = 0; yy < 8; yy++) { @@ -205,40 +157,263 @@ void DISPLAY::draw_screen2() #endif if(p == NULL) continue; yoff = (y + yy) * 80; + for(int i = 0; i < 3; i++) { + cmd.voffset[i] = yoff; + } + # if defined(_FM77AV40EX) || defined(_FM77AV40SX) + int dpage; + dpage = vram_display_block; + bool window_inv = false; if(window_opened && (wy_low <= (y + yy)) && (wy_high > (y + yy))) { - for(x = 0; x < 80; x++) { - if((x >= wx_begin) && (x < wx_end)) { - GETVRAM_8_200L(yoff, p, p2, true, scan_line); - } else { - GETVRAM_8_200L(yoff, p, p2, false, scan_line); + if((wx_begin > 0) && (wx_begin < wx_end) && (wx_begin < 80)) { + // Window : left + cmd.begin_pos = 0; + window_inv = false; + int _wend = wx_end; + if(_wend >= 80) _wend = 80; + cmd.render_width = wx_begin; + yoff_d = (dpage != 0) ? 0x18000 : 0x00000; +#if defined(_FM77AV_VARIANTS) + if(display_page_bak == 1) yoff_d += 0xc000; +#endif + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x4000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, p, p2, scan_line); + + // Center + cmd.begin_pos = wx_begin; + cmd.render_width = _wend - wx_begin; + yoff_d = (dpage != 0) ? 0x00000 : 0x18000; +#if defined(_FM77AV_VARIANTS) + if(display_page_bak == 1) yoff_d += 0xc000; +#endif + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x4000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), &(p2[cmd.begin_pos * 8]) , scan_line); + // Right + if(wx_end < 80) { + cmd.begin_pos = wx_end; + cmd.render_width = 80 - wx_end; + yoff_d = (dpage != 0) ? 0x18000 : 0x00000; +#if defined(_FM77AV_VARIANTS) + if(display_page_bak == 1) yoff_d += 0xc000; +#endif + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x4000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), &(p2[cmd.begin_pos * 8]), scan_line); } #if defined(FIXED_FRAMEBUFFER_SIZE) - p2 += 8; + //CopyDrawnData(p, p2, 80, scan_line); #endif - p += 8; - yoff++; + continue; + } else if((wx_begin <= 0) && (wx_begin < wx_end) && (wx_end >= 0)) { + // Left + cmd.begin_pos = 0; + cmd.render_width = wx_end; + yoff_d = (dpage != 0) ? 0x00000 : 0x18000; +#if defined(_FM77AV_VARIANTS) + if(display_page_bak == 1) yoff_d += 0xc000; +#endif + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x4000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + if(cmd.render_width > 0) Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), &(p2[cmd.begin_pos * 8]), scan_line); + // Right + if(wx_end < 80) { + cmd.begin_pos = wx_end; + cmd.render_width = 80 - wx_end; + yoff_d = (dpage != 0) ? 0x18000 : 0x00000; +#if defined(_FM77AV_VARIANTS) + if(display_page_bak == 1) yoff_d += 0xc000; +#endif + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x4000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), &(p2[cmd.begin_pos * 8]), scan_line); + } +#if defined(FIXED_FRAMEBUFFER_SIZE) +// CopyDrawnData(p, p2, 80, scan_line); +#endif + continue; } - } else -# endif - { - for(x = 0; x < 10; x++) { - for(ii = 0; ii < 8; ii++) { - GETVRAM_8_200L(yoff + ii, p, p2, false, scan_line); + } +#endif + //cmd.begin_pos = 0; + //cmd.render_width = 80; +# if defined(_FM77AV40EX) || defined(_FM77AV40SX) + yoff_d = (dpage != 0) ? 0x18000 : 0x00000; +#else +// yoff_d = 0; +#endif +#if defined(_FM77AV_VARIANTS) + if(display_page_bak == 1) yoff_d += 0xc000; + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x4000); + } +#else +// for(int i = 0; i < 3; i++) { +// cmd.baseaddress[i] = i * 0x4000; +// } +#endif + + Render8Colors_Line(&cmd, p, p2, scan_line); #if defined(FIXED_FRAMEBUFFER_SIZE) - p2 += 8; + //CopyDrawnData(p, p2, 80, scan_line); #endif + } + } + if(ff) force_update = false; + return; + } +#if defined(_FM77L4) + if(display_mode == DISPLAY_MODE_1_400L) { + int ii; + uint8_t *regs = l4crtc->get_regs(); + cursor_start = (int)(regs[10] & 0x1f); + cursor_end = (int)(regs[11] & 0x1f); + cursor_type = (int)((regs[10] & 0x60) >> 5); + text_xmax = (int)((uint16_t)regs[1] << 1); + text_lines = (int)((regs[9] & 0x1f) + 1); + text_ymax = (int)(regs[6] & 0x7f); + yoff = 0; + // Green display had only connected to FM-8, FM-7/NEW7 and FM-77. + for(y = 0; y < 400; y += 8) { + bool renderf = false; + uint32_t naddr; + uint8_t bitcode; + uint8_t charcode; + uint8_t attr_code; + scrntype_t on_color; + int xlim, ylim; + bool do_green; + if((y & 0x0f) == 0) { + for(yy = 0; yy < 16; yy++) renderf |= vram_draw_table[y + yy]; + renderf = renderf | ff; + if(renderf) { + for(yy = 0; yy < 16; yy++) vram_draw_table[y + yy] = true; + } + } + if(use_green_monitor) { + for(yy = 0; yy < 8; yy++) { + if(!(vram_draw_table[y + yy] | ff)) continue; + vram_draw_table[y + yy] = false; + p = emu->get_screen_buffer(y + yy); + if(p == NULL) continue; + yoff = (y + yy) * 80; + for(x = 0; x < 10; x++) { + for(ii = 0; ii < 8; ii++) { + GETVRAM_1_400L_GREEN(yoff + ii, p); + p += 8; + } + yoff += 8; + } + } + do_green = true; + } else { + for(yy = 0; yy < 8; yy++) { + if(!(vram_draw_table[y + yy] | ff)) continue; + vram_draw_table[y + yy] = false; + p = emu->get_screen_buffer(y + yy); + if(p == NULL) continue; + yoff = (y + yy) * 80; + for(x = 0; x < 10; x++) { + for(ii = 0; ii < 8; ii++) { + GETVRAM_1_400L(yoff + ii, p); p += 8; } yoff += 8; } } + do_green = false; + } + // Draw Text + if(renderf) { + bool reverse; + bool display_char; + int raster; + bool cursor_rev; + uint8_t bitdata; + if(text_width40) { + xlim = 40; + } else { + xlim = 80; + } + + for(x = 0; x < xlim; x++) { + naddr = (text_start_addr.w.l + ((y / text_lines) * text_xmax + x) * 2) & 0x0ffe; + charcode = text_vram[naddr]; + attr_code = text_vram[naddr + 1]; + + on_color = GETVRAM_TEXTCOLOR(attr_code, do_green); + + display_char = ((attr_code & 0x10) == 0); + reverse = ((attr_code & 0x08) != 0); + + for(yy = 0; yy < 16; yy++) { + raster = y % text_lines; + bitdata = 0x00; + p = emu->get_screen_buffer(y + yy); + if(p == NULL) continue; + if((raster < 16) && (display_char || text_blink)) { + bitdata = subsys_cg_l4[(uint32_t)charcode * 16 + (uint32_t)raster]; + } + cursor_rev = false; + if((naddr == (uint32_t)(cursor_addr.w.l)) && (cursor_type != 1) && + (text_blink || (cursor_type == 0))) { + if((raster >= cursor_start) && (raster <= cursor_end)) { + cursor_rev = true; + } + } + bitdata = GETVRAM_TEXTPIX(bitdata, reverse, cursor_rev); + if(bitdata != 0) { + if(text_width40) { + scrntype_t *pp = &(p[x * 2]); + for(ii = 0; ii < 8; ii++) { + if((bitdata & 0x80) != 0) { + p[0] = on_color; + p[1] = on_color; + } + bitdata <<= 1; + p += 2; + } + } else { + scrntype_t *pp = &(p[x * 2]); + for(ii = 0; ii < 8; ii++) { + if((bitdata & 0x80) != 0) { + p[0] = on_color; + } + bitdata <<= 1; + p += 1; + } + } + } + } + } } - } if(ff) force_update = false; return; } +#endif # if defined(_FM77AV_VARIANTS) if(display_mode == DISPLAY_MODE_4096) { uint32_t mask = 0; @@ -301,9 +476,28 @@ void DISPLAY::draw_screen2() } # if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX) else if(display_mode == DISPLAY_MODE_8_400L) { + _render_command_data_t cmd; int ii; yoff = 0; - //rgbmask = ~multimode_dispmask; + cmd.palette = dpalette_pixel; + for(int i = 0; i < 3; i++) { + cmd.data[i] = gvram_shadow; + cmd.baseaddress[i] = i * 0x8000; + cmd.voffset[i] = yoff; + cmd.is_render[i] = false; + } + if(!multimode_dispflags[0]) cmd.is_render[0] = true; + if(!multimode_dispflags[1]) cmd.is_render[1] = true; + if(!multimode_dispflags[2]) cmd.is_render[2] = true; + cmd.bit_trans_table[0] = (_bit_trans_table_t*)(&(bit_trans_table_2[0][0])); // B + cmd.bit_trans_table[1] = (_bit_trans_table_t*)(&(bit_trans_table_1[0][0])); // R + cmd.bit_trans_table[2] = (_bit_trans_table_t*)(&(bit_trans_table_0[0][0])); // G + cmd.xzoom = 1; + cmd.addrmask = 0x7fff; + cmd.addrmask2 = 0x7fff; + cmd.begin_pos = 0; + cmd.shift = 5; + cmd.render_width = 80; for(y = 0; y < 400; y += 8) { for(yy = 0; yy < 8; yy++) { if(!(vram_draw_table[y + yy] | ff)) continue; @@ -313,27 +507,97 @@ void DISPLAY::draw_screen2() if(p == NULL) continue; pp = p; yoff = (y + yy) * 80; + for(int i = 0; i < 3; i++) { + cmd.voffset[i] = yoff; + } + int dpage; + bool window_inv = false; + uint32_t yoff_d; + dpage = vram_display_block; # if defined(_FM77AV40EX) || defined(_FM77AV40SX) - if(window_opened && (wy_low <= (y + yy)) && (wy_high > (y + yy))) { - for(x = 0; x < 80; x++) { - if((x >= wx_begin) && (x < wx_end)) { - GETVRAM_8_400L(yoff, p, true); - } else { - GETVRAM_8_400L(yoff, p, false); + if(window_opened && (wy_low <= (y + yy)) && (wy_high > (y + yy))) { + if((wx_begin > 0) && (wx_begin < wx_end) && (wx_begin < 80)) { + // Window : left + cmd.begin_pos = 0; + window_inv = false; + int _wend = wx_end; + if(_wend >= 80) _wend = 80; + cmd.render_width = wx_begin; + yoff_d = (dpage != 0) ? 0x18000 : 0x00000; + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x8000); } - p += 8; - yoff++; - } - } else -# endif - for(x = 0; x < 10; x++) { + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, p, NULL, false); - for(ii = 0; ii < 8; ii++) { - GETVRAM_8_400L(yoff + ii, p); - p += 8; + // Center + cmd.begin_pos = wx_begin; + cmd.render_width = _wend - wx_begin; + yoff_d = (dpage != 0) ? 0x00000 : 0x18000; + if(display_page_bak == 1) yoff_d += 0xc000; + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x8000); } - yoff += 8; - } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false); + // Right + if(wx_end < 80) { + cmd.begin_pos = wx_end; + cmd.render_width = 80 - wx_end; + yoff_d = (dpage != 0) ? 0x18000 : 0x00000; + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x8000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false); + } + continue; + } else if((wx_begin <= 0) && (wx_begin < wx_end) && (wx_end >= 0)) { + // Left + cmd.begin_pos = 0; + cmd.render_width = wx_end; + yoff_d = (dpage != 0) ? 0x00000 : 0x18000; + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x8000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + if(cmd.render_width > 0) Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false); + // Right + if(wx_end < 80) { + cmd.begin_pos = wx_end; + cmd.render_width = 80 - wx_end; + yoff_d = (dpage != 0) ? 0x18000 : 0x00000; + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x8000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false); + } + continue; + } + } +# endif + // Not Opened + cmd.begin_pos = 0; + cmd.render_width = 80; + yoff_d = (dpage != 0) ? 0x18000 : 0x00000; + for(int i = 0; i < 3; i++) { + cmd.baseaddress[i] = yoff_d + (i * 0x8000); + } + if(cmd.render_width > 0) { + if(cmd.render_width > 80) cmd.render_width = 80; + } + Render8Colors_Line(&cmd, &(p[cmd.begin_pos * 8]), NULL, false); } } if(ff) force_update = false; @@ -396,153 +660,132 @@ void DISPLAY::reset_screen_update(void) screen_update_flag = false; } -void DISPLAY::GETVRAM_8_200L(int yoff, scrntype_t *p, - scrntype_t *px, - bool window_inv, - bool scan_line) +void DISPLAY::CopyDrawnData(scrntype_t* src, scrntype_t* dst, int width, bool scan_line) { - uint8_t b, r, g; - uint32_t yoff_d; -#if defined(_FM77AV40EX) || defined(_FM77AV40SX) - int dpage = vram_display_block; + if(dst == NULL) return; + if(src == NULL) return; +#if defined(_RGB555) || defined(_RGBA565) + static const int shift_factor = 2; +#else // 24bit + static const int shift_factor = 3; #endif - if(p == NULL) return; - yoff_d = 0; - yoff_d = (yoff + yoff_d) & 0x3fff; - -#if defined(_FM77AV40EX) || defined(_FM77AV40SX) - if(window_inv) { - if(dpage == 0) { - dpage = 1; - } else { - dpage = 0; + scrntype_vec8_t* vsrc = (scrntype_vec8_t*)__builtin_assume_aligned(src, sizeof(scrntype_vec8_t)); + scrntype_vec8_t* vdst = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t)); + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; + __DECL_ALIGNED(32) scrntype_vec8_t sline; + + if(scan_line) { +__DECL_VECTORIZED_LOOP + for(int i = 0; i < 8; i++) { + sline.w[i] = (scrntype_t)RGBA_COLOR(31, 31, 31, 255); + } +__DECL_VECTORIZED_LOOP + for(int i = 0; i < width; i++) { + tmp_dd.v = vsrc[i].v; + tmp_dd.v = tmp_dd.v >> shift_factor; + tmp_dd.v = tmp_dd.v & sline.v; + vdst[i].v = tmp_dd.v; + } + } else { +__DECL_VECTORIZED_LOOP + for(int i = 0; i < width; i++) { + tmp_dd.v = vsrc[i].v; + vdst[i].v = tmp_dd.v; } } - if(dpage != 0) yoff_d += 0x18000; -#endif - b = r = g = 0; -#if defined(_FM77AV_VARIANTS) - if(display_page_bak == 1) yoff_d += 0xc000; -#endif - if(!multimode_dispflags[0]) b = gvram_shadow[yoff_d + 0x00000]; - if(!multimode_dispflags[1]) r = gvram_shadow[yoff_d + 0x04000]; - if(!multimode_dispflags[2]) g = gvram_shadow[yoff_d + 0x08000]; +} - uint16_t *pg = &(bit_trans_table_0[g][0]); - uint16_t *pr = &(bit_trans_table_1[r][0]); - uint16_t *pb = &(bit_trans_table_2[b][0]); - uint16_t tmp_d[8]; - scrntype_t tmp_dd[8]; -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - tmp_d[i] = pr[i]; - tmp_d[i] = tmp_d[i] | pg[i]; - tmp_d[i] = tmp_d[i] | pb[i]; - tmp_d[i] = tmp_d[i] >> 5; - } +#if defined(_FM77L4) +scrntype_t DISPLAY::GETVRAM_TEXTCOLOR(uint8_t attr, bool do_green) +{ + int color = attr & 0x07; + int r, g, b; -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - tmp_dd[i] = dpalette_pixel[tmp_d[i]]; - } -#if defined(FIXED_FRAMEBUFFER_SIZE) - if(scan_line) { -/* Fancy scanline */ - #if defined(_RGB555) || defined(_RGBA565) - static const int shift_factor = 2; - #else // 24bit - static const int shift_factor = 3; - #endif -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - p[i] = tmp_dd[i]; - tmp_dd[i] = (tmp_dd[i] >> shift_factor) & (const scrntype_t)RGBA_COLOR(31, 31, 31, 255);; - px[i] = tmp_dd[i]; + static const int green_g_table[16] = {0, 24, 48, 64, 80, 96, 112, 128, + 140, 155, 175, 186, 210, 220, 240, 255}; + if(do_green) { + if((attr & 0x20) != 0) color += 8; + r = b = 0; + g = green_g_table[color]; + if(color >= 10) { + r = (color - 9) * 16; + b = (color - 9) * 16; } } else { -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - p[i] = tmp_dd[i]; - px[i] = tmp_dd[i]; + if((attr & 0x20) != 0) { + g = ((color & 4) != 0) ? 255 : 0; + r = ((color & 2) != 0) ? 255 : 0; + b = ((color & 1) != 0) ? 255 : 0; + } else { + g = ((color & 4) != 0) ? 128 : 0; + r = ((color & 2) != 0) ? 128 : 0; + b = ((color & 1) != 0) ? 128 : 0; } } -#else -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - p[i] = tmp_dd[i]; + return RGBA_COLOR(r, g, b, 255); +} + +uint8_t DISPLAY::GETVRAM_TEXTPIX(uint8_t bitdata, bool reverse, bool cursor_rev) +{ + uint8_t ret = bitdata; + if(reverse) { + ret = (uint8_t)(~ret); } -#endif + if(cursor_rev) { + ret = (uint8_t)(~ret); + } + return ret; } -#if defined(USE_GREEN_DISPLAY) -void DISPLAY::GETVRAM_8_200L_GREEN(int yoff, scrntype_t *p, - scrntype_t *px, - bool window_inv, - bool scan_line) +void DISPLAY::GETVRAM_1_400L(int yoff, scrntype_t *p) { - uint8_t b, r, g; + uint8_t pixel; uint32_t yoff_d; -#if defined(_FM77AV40EX) || defined(_FM77AV40SX) - int dpage = vram_display_block; -#endif if(p == NULL) return; - yoff_d = 0; - yoff_d = (yoff + yoff_d) & 0x3fff; - - b = r = g = 0; - if(!multimode_dispflags[0]) b = gvram_shadow[yoff_d + 0x00000]; - if(!multimode_dispflags[1]) r = gvram_shadow[yoff_d + 0x04000]; - if(!multimode_dispflags[2]) g = gvram_shadow[yoff_d + 0x08000]; - - uint16_t *pg = &(bit_trans_table_0[g][0]); - uint16_t *pr = &(bit_trans_table_1[r][0]); - uint16_t *pb = &(bit_trans_table_2[b][0]); - uint16_t tmp_d[8]; - scrntype_t tmp_dd[8]; + yoff_d = yoff & 0x7fff; + pixel = gvram_shadow[yoff_d]; + uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16); + __DECL_ALIGNED(16) uint16_vec8_t tmp_d; + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; + scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t)); + tmp_d.v = ppx->v; + tmp_d.v = tmp_d.v >> 5; + __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - tmp_d[i] = pr[i]; - tmp_d[i] = tmp_d[i] | pg[i]; - tmp_d[i] = tmp_d[i] | pb[i]; - tmp_d[i] = tmp_d[i] >> 5; + tmp_dd.w[i] = dpalette_pixel[tmp_d.w[i]]; } + vp->v = tmp_dd.v; +} + +void DISPLAY::GETVRAM_1_400L_GREEN(int yoff, scrntype_t *p) +{ + uint8_t pixel; + uint32_t yoff_d; + if(p == NULL) return; + yoff_d = yoff & 0x7fff; + pixel = gvram_shadow[yoff_d]; + uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16); + __DECL_ALIGNED(16) uint16_vec8_t tmp_d; + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; + scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t)); + + tmp_d.v = ppx->v; + tmp_d.v = tmp_d.v >> 5; + __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - tmp_dd[i] = dpalette_pixel_green[tmp_d[i]]; - } -#if defined(FIXED_FRAMEBUFFER_SIZE) - if(scan_line) { -/* Fancy scanline */ - #if defined(_RGB555) || defined(_RGBA565) - static const int shift_factor = 2; - #else // 24bit - static const int shift_factor = 3; - #endif -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - p[i] = tmp_dd[i]; - tmp_dd[i] = (tmp_dd[i] >> shift_factor) & (const scrntype_t)RGBA_COLOR(31, 31, 31, 255);; - px[i] = tmp_dd[i]; - } - } else { -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - p[i] = tmp_dd[i]; - px[i] = tmp_dd[i]; - } - } -#else -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - p[i] = tmp_dd[i]; + tmp_dd.w[i] = dpalette_pixel_green[tmp_d.w[i]]; } -#endif + vp->v = tmp_dd.v; + } #endif + #if defined(_FM77AV_VARIANTS) void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px, uint32_t mask, @@ -551,8 +794,8 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px, { uint32_t b3, r3, g3; uint8_t bb[4], rr[4], gg[4]; - uint16_t pixels[8]; - const uint16_t __masks[8] = {(uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask}; + __DECL_ALIGNED(16) uint16_vec8_t pixels; + __DECL_ALIGNED(16) const uint16_t __masks[8] = {(uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask}; scrntype_t b, r, g; uint32_t idx;; scrntype_t pixel; @@ -582,157 +825,113 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px, bb[1] = gvram_shadow[yoff_d1 + 0x02000]; rr[0] = gvram_shadow[yoff_d1 + 0x04000]; rr[1] = gvram_shadow[yoff_d1 + 0x06000]; - gg[0] = gvram_shadow[yoff_d1 + 0x08000]; gg[1] = gvram_shadow[yoff_d1 + 0x0a000]; bb[2] = gvram_shadow[yoff_d2 + 0x0c000]; bb[3] = gvram_shadow[yoff_d2 + 0x0e000]; - rr[2] = gvram_shadow[yoff_d2 + 0x10000]; rr[3] = gvram_shadow[yoff_d2 + 0x12000]; gg[2] = gvram_shadow[yoff_d2 + 0x14000]; gg[3] = gvram_shadow[yoff_d2 + 0x16000]; - uint16_t tmp_g[8], tmp_r[8], tmp_b[8]; uint16_t *p0, *p1, *p2, *p3; #if !defined(FIXED_FRAMEBUFFER_SIZE) - scrntype_t tmp_dd[8]; + __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[8]; #else - scrntype_t tmp_dd[16]; + __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[16]; #endif + __DECL_ALIGNED(16) uint16_vec8_t tmp_g, tmp_r, tmp_b; + __v8hi *vp0, *vp1, *vp2, *vp3; // G - p0 = &(bit_trans_table_0[gg[0]][0]); - p1 = &(bit_trans_table_1[gg[1]][0]); - p2 = &(bit_trans_table_2[gg[2]][0]); - p3 = &(bit_trans_table_3[gg[3]][0]); -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - tmp_g[i] = p0[i]; - tmp_g[i] = tmp_g[i] | p1[i]; - tmp_g[i] = tmp_g[i] | p2[i]; - tmp_g[i] = tmp_g[i] | p3[i]; - } + vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[gg[0]][0]), 16); + vp1 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_1[gg[1]][0]), 16); + vp2 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_2[gg[2]][0]), 16); + vp3 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_3[gg[3]][0]), 16); + tmp_g.v = *vp0; + tmp_g.v = tmp_g.v | *vp1; + tmp_g.v = tmp_g.v | *vp2; + tmp_g.v = tmp_g.v | *vp3; // R - p0 = &(bit_trans_table_0[rr[0]][0]); - p1 = &(bit_trans_table_1[rr[1]][0]); - p2 = &(bit_trans_table_2[rr[2]][0]); - p3 = &(bit_trans_table_3[rr[3]][0]); -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - tmp_r[i] = p0[i]; - tmp_r[i] = tmp_r[i] | p1[i]; - tmp_r[i] = tmp_r[i] | p2[i]; - tmp_r[i] = tmp_r[i] | p3[i]; - } + vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[rr[0]][0]), 16); + vp1 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_1[rr[1]][0]), 16); + vp2 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_2[rr[2]][0]), 16); + vp3 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_3[rr[3]][0]), 16); + tmp_r.v = *vp0; + tmp_r.v = tmp_r.v | *vp1; + tmp_r.v = tmp_r.v | *vp2; + tmp_r.v = tmp_r.v | *vp3; + // B - p0 = &(bit_trans_table_0[bb[0]][0]); - p1 = &(bit_trans_table_1[bb[1]][0]); - p2 = &(bit_trans_table_2[bb[2]][0]); - p3 = &(bit_trans_table_3[bb[3]][0]); -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - tmp_b[i] = p0[i]; - tmp_b[i] = tmp_b[i] | p1[i]; - tmp_b[i] = tmp_b[i] | p2[i]; - tmp_b[i] = tmp_b[i] | p3[i]; - } -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - tmp_g[i] <<= 4; - pixels[i] = tmp_b[i] >> 4; - pixels[i] = pixels[i] | tmp_r[i]; - pixels[i] = pixels[i] | tmp_g[i]; - pixels[i] = pixels[i] & __masks[i]; - } - //for(int i = 0; i < 8; i++) { - // pixels[i] = pixels[i] & mask; - //} + vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[bb[0]][0]), 16); + vp1 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_1[bb[1]][0]), 16); + vp2 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_2[bb[2]][0]), 16); + vp3 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_3[bb[3]][0]), 16); + tmp_b.v = *vp0; + tmp_b.v = tmp_b.v | *vp1; + tmp_b.v = tmp_b.v | *vp2; + tmp_b.v = tmp_b.v | *vp3; + + __v8hi *mp = (__v8hi*)__masks; + tmp_g.v = tmp_g.v << 4; + tmp_b.v = tmp_b.v >> 4; + pixels.v = tmp_b.v; + pixels.v = pixels.v | tmp_r.v; + pixels.v = pixels.v | tmp_g.v; + pixels.v = pixels.v & *mp; + + + scrntype_vec8_t *vp = (scrntype_vec8_t*)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t)); + scrntype_vec8_t *dp = (scrntype_vec8_t*)__builtin_assume_aligned(tmp_dd, sizeof(scrntype_vec8_t)); #if !defined(FIXED_FRAMEBUFFER_SIZE) __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { tmp_dd[i] = analog_palette_pixel[pixels[i]]; - p[i] = tmp_dd[i]; } + vp->v = dp->v; #else __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = analog_palette_pixel[pixels[i]];; + tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = analog_palette_pixel[pixels.w[i]];; } + scrntype_vec8_t *vpx = (scrntype_vec8_t*)__builtin_assume_aligned(px, sizeof(scrntype_vec8_t)); + __DECL_ALIGNED(32) scrntype_vec8_t vmask; __DECL_VECTORIZED_LOOP - for(int i = 0; i < 16; i++) { - p[i] = tmp_dd[i]; + for(int i = 0; i < 2; i++) { + vp[i].v = dp[i].v; } if(scan_line) { /* Fancy scanline */ __DECL_VECTORIZED_LOOP - for(int i = 0; i < 16; i++) { + for(int i = 0; i < 2; i++) { #if defined(_RGB888) || defined(_RGBA888) - tmp_dd[i] = tmp_dd[i] >> 3; + dp[i].v = dp[i].v >> 3; #elif defined(_RGB555) - tmp_dd[i] = tmp_dd[i] >> 2; + dp[i].v = dp[i].v >> 2; #elif defined(_RGB565) - tmp_dd[i] = tmp_dd[i] >> 2; + dp[i].v = dp[i].v >> 2; #endif } __DECL_VECTORIZED_LOOP - for(int i = 0; i < 16; i++) { - tmp_dd[i] = tmp_dd[i] & (const scrntype_t)RGBA_COLOR(31, 31, 31, 255); - px[i] = tmp_dd[i]; + for(int i = 0; i < 8; i++) { + vmask.w[i] = (const scrntype_t)RGBA_COLOR(31, 31, 31, 255); + } +__DECL_VECTORIZED_LOOP + for(int i = 0; i < 2; i++) { + dp[i].v = dp[i].v & vmask.v; + vpx[i].v = dp[i].v; } } else { __DECL_VECTORIZED_LOOP - for(int i = 0; i < 16; i++) { - px[i] = tmp_dd[i]; + for(int i = 0; i < 2; i++) { + vpx[i].v = dp[i].v; } } -#endif - +#endif } #endif #if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX) -void DISPLAY::GETVRAM_8_400L(int yoff, scrntype_t *p, - bool window_inv) -{ - uint8_t b, r, g; - uint32_t dot; - uint32_t yoff_d; -# if defined(_FM77AV40EX) || defined(_FM77AV40SX) - int dpage = vram_display_block; -# endif - if(p == NULL) return; - yoff_d = yoff; -# if defined(_FM77AV40EX) || defined(_FM77AV40SX) - if(window_inv) { - if(dpage == 0) { - dpage = 1; - } else { - dpage = 0; - } - } - if(dpage != 0) yoff_d += 0x18000; -# endif - b = r = g = 0; - if(!multimode_dispflags[0]) b = gvram_shadow[yoff_d + 0x00000]; - if(!multimode_dispflags[1]) r = gvram_shadow[yoff_d + 0x08000]; - if(!multimode_dispflags[2]) g = gvram_shadow[yoff_d + 0x10000]; - - uint16_t *pg = &(bit_trans_table_0[g][0]); - uint16_t *pr = &(bit_trans_table_1[r][0]); - uint16_t *pb = &(bit_trans_table_2[b][0]); - uint16_t tmp_d[8]; - -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - tmp_d[i] = pg[i] | pr[i] | pb[i]; - tmp_d[i] = tmp_d[i] >> 5; - } -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - p[i] = dpalette_pixel[tmp_d[i]]; - } -} void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_line) { @@ -757,12 +956,15 @@ void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_li yoff_d2 = yoff; uint8_t bb[8], rr[8], gg[8]; - uint16_t *p0, *p1, *p2, *p3, *p4, *p5; - uint32_t _btmp[8], _rtmp[8], _gtmp[8]; + + __DECL_ALIGNED(16) uint16_vec8_t _btmp; + __DECL_ALIGNED(16) uint16_vec8_t _rtmp; + __DECL_ALIGNED(16) uint16_vec8_t _gtmp; + uint16_vec8_t *vp0, *vp1, *vp2, *vp3, *vp4, *vp5; #if !defined(FIXED_FRAMEBUFFER_SIZE) - scrntype_t tmp_dd[8]; + __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[8]; #else - scrntype_t tmp_dd[16]; + __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[16]; #endif // if(mask & 0x01) { if(!multimode_dispflags[0]) { @@ -776,25 +978,22 @@ void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_li bb[4] = gvram_shadow[yoff_d1 + 0x18000]; bb[5] = gvram_shadow[yoff_d1 + 0x1a000]; - p0 = &(bit_trans_table_0[bb[0]][0]); - p1 = &(bit_trans_table_1[bb[1]][0]); - p2 = &(bit_trans_table_2[bb[2]][0]); - p3 = &(bit_trans_table_3[bb[3]][0]); - p4 = &(bit_trans_table_4[bb[4]][0]); - p5 = &(bit_trans_table_5[bb[5]][0]); -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - _btmp[i] = p0[i]; - _btmp[i] = _btmp[i] | p1[i]; - _btmp[i] = _btmp[i] | p2[i]; - _btmp[i] = _btmp[i] | p3[i]; - _btmp[i] = _btmp[i] | p4[i]; - _btmp[i] = _btmp[i] | p5[i]; - } + vp0 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_0[bb[0]][0]), 16); + vp1 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_1[bb[1]][0]), 16); + vp2 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_2[bb[2]][0]), 16); + vp3 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_3[bb[3]][0]), 16); + vp4 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_4[bb[4]][0]), 16); + vp5 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_5[bb[5]][0]), 16); + _btmp.v = vp0->v; + _btmp.v = _btmp.v | vp1->v; + _btmp.v = _btmp.v | vp2->v; + _btmp.v = _btmp.v | vp3->v; + _btmp.v = _btmp.v | vp4->v; + _btmp.v = _btmp.v | vp5->v; } else { __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - _btmp[i] = 0; + _btmp.w[i] = 0; } } if(!multimode_dispflags[1]) { @@ -809,25 +1008,22 @@ __DECL_VECTORIZED_LOOP rr[4] = gvram_shadow[yoff_d1 + 0x1c000]; rr[5] = gvram_shadow[yoff_d1 + 0x1e000]; - p0 = &(bit_trans_table_0[rr[0]][0]); - p1 = &(bit_trans_table_1[rr[1]][0]); - p2 = &(bit_trans_table_2[rr[2]][0]); - p3 = &(bit_trans_table_3[rr[3]][0]); - p4 = &(bit_trans_table_4[rr[4]][0]); - p5 = &(bit_trans_table_5[rr[5]][0]); -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - _rtmp[i] = p0[i]; - _rtmp[i] = _rtmp[i] | p1[i]; - _rtmp[i] = _rtmp[i] | p2[i]; - _rtmp[i] = _rtmp[i] | p3[i]; - _rtmp[i] = _rtmp[i] | p4[i]; - _rtmp[i] = _rtmp[i] | p5[i]; - } + vp0 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_0[rr[0]][0]), 16); + vp1 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_1[rr[1]][0]), 16); + vp2 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_2[rr[2]][0]), 16); + vp3 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_3[rr[3]][0]), 16); + vp4 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_4[rr[4]][0]), 16); + vp5 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_5[rr[5]][0]), 16); + _rtmp.v = vp0->v; + _rtmp.v = _rtmp.v | vp1->v; + _rtmp.v = _rtmp.v | vp2->v; + _rtmp.v = _rtmp.v | vp3->v; + _rtmp.v = _rtmp.v | vp4->v; + _rtmp.v = _rtmp.v | vp5->v; } else { __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - _rtmp[i] = 0; + _rtmp.w[i] = 0; } } if(!multimode_dispflags[2]) { @@ -842,65 +1038,73 @@ __DECL_VECTORIZED_LOOP gg[4] = gvram_shadow[yoff_d1 + 0x20000]; gg[5] = gvram_shadow[yoff_d1 + 0x22000]; - p0 = &(bit_trans_table_0[gg[0]][0]); - p1 = &(bit_trans_table_1[gg[1]][0]); - p2 = &(bit_trans_table_2[gg[2]][0]); - p3 = &(bit_trans_table_3[gg[3]][0]); - p4 = &(bit_trans_table_4[gg[4]][0]); - p5 = &(bit_trans_table_5[gg[5]][0]); -__DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - _gtmp[i] = p0[i]; - _gtmp[i] = _gtmp[i] | p1[i]; - _gtmp[i] = _gtmp[i] | p2[i]; - _gtmp[i] = _gtmp[i] | p3[i]; - _gtmp[i] = _gtmp[i] | p4[i]; - _gtmp[i] = _gtmp[i] | p5[i]; - } + vp0 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_0[gg[0]][0]), 16); + vp1 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_1[gg[1]][0]), 16); + vp2 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_2[gg[2]][0]), 16); + vp3 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_3[gg[3]][0]), 16); + vp4 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_4[gg[4]][0]), 16); + vp5 = (uint16_vec8_t*)__builtin_assume_aligned(&(bit_trans_table_5[gg[5]][0]), 16); + _gtmp.v = vp0->v; + _gtmp.v = _gtmp.v | vp1->v; + _gtmp.v = _gtmp.v | vp2->v; + _gtmp.v = _gtmp.v | vp3->v; + _gtmp.v = _gtmp.v | vp4->v; + _gtmp.v = _gtmp.v | vp5->v; } else { __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - _gtmp[i] = 0; + _gtmp.w[i] = 0; } } + + scrntype_vec8_t* vpp = (scrntype_vec8_t*)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t)); + scrntype_vec8_t* dp = (scrntype_vec8_t*)tmp_dd; #if !defined(FIXED_FRAMEBUFFER_SIZE) __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - tmp_dd[i] = RGB_COLOR(_rtmp[i], _gtmp[i], _btmp[i]); - p[i] = tmp_dd[i]; + tmp_dd[i] = RGB_COLOR(_rtmp.w[i], _gtmp.w[i], _btmp.w[i]); } + vpp->v = dp->v; #else __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = RGB_COLOR(_rtmp[i], _gtmp[i], _btmp[i]); + tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = RGB_COLOR(_rtmp.w[i], _gtmp.w[i], _btmp.w[i]); } + __DECL_VECTORIZED_LOOP - for(int i = 0; i < 16; i++) { - p[i] = tmp_dd[i]; + for(int i = 0; i < 2; i++) { + vpp[i].v = dp[i].v; } + scrntype_vec8_t* vpx = (scrntype_vec8_t*)__builtin_assume_aligned(px, sizeof(scrntype_vec8_t)); if(scan_line) { /* Fancy scanline */ __DECL_VECTORIZED_LOOP - for(int i = 0; i < 16; i++) { + for(int i = 0; i < 2; i++) { #if defined(_RGB888) || defined(_RGBA888) - tmp_dd[i] = tmp_dd[i] >> 3; + dp[i].v = dp[i].v >> 3; #elif defined(_RGB555) - tmp_dd[i] = tmp_dd[i] >> 2; + dp[i].v = dp[i].v >> 2; #elif defined(_RGB565) - tmp_dd[i] = tmp_dd[i] >> 2; + dp[i].v = dp[i].v >> 2; #endif } + __DECL_ALIGNED(32) scrntype_vec8_t scanline_data; __DECL_VECTORIZED_LOOP - for(int i = 0; i < 16; i++) { - tmp_dd[i] = tmp_dd[i] & RGBA_COLOR(31, 31, 31, 256); - px[i] = tmp_dd[i]; + for(int i = 0; i < 8; i++) { + scanline_data.w[i] = RGBA_COLOR(31, 31, 31, 255); } - } else { __DECL_VECTORIZED_LOOP - for(int i = 0; i < 16; i++) { - px[i] = tmp_dd[i]; + for(int i = 0; i < 2; i++) { + dp[i].v = dp[i].v & scanline_data.v; + vpx[i].v = dp[i].v; + } + } else { + for(int i = 0; i < 2; i++) { + vpx[i].v = dp[i].v; } } #endif } #endif + +}