OSDN Git Service

[VM][X1][DRAW] Fix spending a lot of host CPU usage on draw_screen().This issue has...
authorK.Ohta <whatisthis.sowhat@gmail.com>
Fri, 17 Jul 2020 11:40:10 +0000 (20:40 +0900)
committerK.Ohta <whatisthis.sowhat@gmail.com>
Fri, 17 Jul 2020 11:40:10 +0000 (20:40 +0900)
[VM][X1][DRAW] Set alignment of RAM and some values.

source/src/vm/x1/display.cpp
source/src/vm/x1/display.h

index e61ba5b..47cc61c 100644 (file)
@@ -158,6 +158,9 @@ void DISPLAY::reset()
        mode1 = 0;//3;
        mode2 = 0;
        hireso = true;
+       emu->set_vm_screen_lines(400);
+#else
+       emu->set_vm_screen_lines(200);
 #endif
 #ifdef _X1TURBOZ
        zmode1 = 0;
@@ -584,14 +587,17 @@ void DISPLAY::event_frame()
        hz_disp = regs[1];
        vt_disp = regs[6] & 0x7f;
        st_addr = (regs[12] << 8) | regs[13];
-       
 #ifdef _X1TURBO_FEATURE
        int vt_total = ((regs[4] & 0x7f) + 1) * ch_height + (regs[5] & 0x1f);
-       hireso = (vt_total > 400);
+       bool hireso_old = hireso;
+       hireso = (vt_total >= 400);
 #endif
        int vlen;
 #ifdef _X1TURBO_FEATURE
        vlen = (hireso) ? 400 : 200;
+//     if(hireso_old != hireso) {
+//             emu->set_vm_screen_lines(vlen);
+//     }
 #else
        vlen = 200;
 #endif
@@ -858,7 +864,7 @@ void DISPLAY::draw_screen()
                        draw_line(v);
                }
        }
-       
+
        // copy to real screen
 #ifdef _X1TURBOZ
        dr_zpalette_pc[8 + 0] = dr_zpalette_pc[16 + 0x000];
@@ -870,10 +876,11 @@ void DISPLAY::draw_screen()
        dr_zpalette_pc[8 + 6] = dr_zpalette_pc[16 + 0xff0];
        dr_zpalette_pc[8 + 7] = dr_zpalette_pc[16 + 0xfff];
 #endif
+       __DECL_ALIGNED(16) scrntype_t dbuf[640];
 #ifdef _X1TURBO_FEATURE
        if(hireso) {
                // 400 lines
-               emu->set_vm_screen_lines(400);
+//             emu->set_vm_screen_lines(400);
                if(column40) {
                        // 40 columns
                        for(int y = 0; y < 400; y++) {
@@ -886,7 +893,7 @@ void DISPLAY::draw_screen()
                                        for(int x = 0, x2 = 0; x < 320; x++, x2 += 2) {
                                                uint16_t cg00 = src_cg0[x] | (src_cg0[x] >> 2);
                                                
-                                               dest[x2] = dest[x2 + 1] = get_zpriority(src_text[x], cg00, cg00);
+                                               dbuf[x2] = dbuf[x2 + 1] = get_zpriority(src_text[x], cg00, cg00);
                                        }
                                } else {
 #endif
@@ -894,14 +901,18 @@ void DISPLAY::draw_screen()
                                        
                                        for(int x = 0, x2 = 0; x < 320; x++, x2 += 2) {
 #ifdef _X1TURBOZ
-                                               dest[x2] = dest[x2 + 1] = dr_zpalette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
+                                               dbuf[x2] = dbuf[x2 + 1] = dr_zpalette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
 #else
-                                               dest[x2] = dest[x2 + 1] =  dr_palette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
+                                               dbuf[x2] = dbuf[x2 + 1] =  dr_palette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
 #endif
                                        }
 #ifdef _X1TURBOZ
                                }
 #endif
+__DECL_VECTORIZED_LOOP
+                               for(int xx = 0; xx < 640; xx++) {
+                                       dest[xx] = dbuf[xx];
+                               }
                        }
                } else {
                        // 80 columns
@@ -915,7 +926,7 @@ void DISPLAY::draw_screen()
                                        for(int x = 0; x < 640; x++) {
                                                uint16_t cg00 = src_cg0[x] | (src_cg0[x] >> 2);
                                                
-                                               dest[x] = get_zpriority(src_text[x], cg00, cg00);
+                                               dbuf[x] = get_zpriority(src_text[x], cg00, cg00);
                                        }
                                } else {
 #endif
@@ -923,29 +934,33 @@ void DISPLAY::draw_screen()
                                        
                                        for(int x = 0; x < 640; x++) {
 #ifdef _X1TURBOZ
-                                               dest[x] = dr_zpalette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
+                                               dbuf[x] = dr_zpalette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
 #else
-                                               dest[x] =  dr_palette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
+                                               dbuf[x] =  dr_palette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
 #endif
                                        }
 #ifdef _X1TURBOZ
                                }
 #endif
+__DECL_VECTORIZED_LOOP
+                               for(int xx = 0; xx < 640; xx++) {
+                                       dest[xx] = dbuf[xx];
+                               }
                        }
                }
                emu->screen_skip_line(false);
        } else {
 #endif
-               emu->set_vm_screen_lines(200);
+//             emu->set_vm_screen_lines(200);
                // 200 lines
-               emu->set_vm_screen_lines(200);
+//             emu->set_vm_screen_lines(200);
                
                if(column40) {
                        // 40 columns
                        for(int y = 0; y < 200; y++) {
+                               uint8_t* src_text = dr_text[y];
                                scrntype_t* dest0 = emu->get_screen_buffer(y * 2 + 0);
                                scrntype_t* dest1 = emu->get_screen_buffer(y * 2 + 1);
-                               uint8_t* src_text = dr_text[y];
 #ifdef _X1TURBOZ
                                if(dr_aen_line[y]) {
                                        uint16_t* src_cg0 = dr_zcg[0][y];
@@ -956,32 +971,40 @@ void DISPLAY::draw_screen()
                                                        uint16_t cg00 = src_cg0[x] | (src_cg0[x] >> 2);
                                                        uint16_t cg11 = src_cg1[x] | (src_cg1[x] >> 2);
                                                        
-                                                       dest0[x2] = dest0[x2 + 1] = get_zpriority(src_text[x], cg00, cg11);
+                                                       dbuf[x2] = dbuf[x2 + 1] = get_zpriority(src_text[x], cg00, cg11);
                                                }
                                        } else {
                                                for(int x = 0, x2 = 0; x < 320; x++, x2 += 2) {
                                                        uint16_t cg01 = src_cg0[x] | (src_cg1[x] >> 2);
                                                        
-                                                       dest0[x2] = dest0[x2 + 1] = get_zpriority(src_text[x], cg01, cg01);
+                                                       dbuf[x2] = dbuf[x2 + 1] = get_zpriority(src_text[x], cg01, cg01);
                                                }
+
                                        }
                                } else {
 #endif
-                                       scrntype_t* dest = emu->get_screen_buffer(y);
+//                                     scrntype_t* dest = emu->get_screen_buffer(y);
                                        uint8_t* src_cg = dr_cg[y];
                                
                                        for(int x = 0, x2 = 0; x < 320; x++, x2 += 2) {
 #ifdef _X1TURBOZ
-                                               dest0[x2] = dest0[x2 + 1] = dr_zpalette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
+                                               dbuf[x2] = dbuf[x2 + 1] = dr_zpalette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
 #else
-                                               dest0[x2] = dest0[x2 + 1] =  dr_palette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
+                                               dbuf[x2] = dbuf[x2 + 1] =  dr_palette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
 #endif
                                        }
 #ifdef _X1TURBOZ
                                }
 #endif
+__DECL_VECTORIZED_LOOP
+                               for(int xx = 0; xx < 640; xx++) {
+                                       dest0[xx] = dbuf[xx];
+                               }
                                if(!config.scan_line) {
-                                       my_memcpy(dest1, dest0, 640 * sizeof(scrntype_t));
+__DECL_VECTORIZED_LOOP
+                                       for(int xx = 0; xx < 640; xx++) {
+                                               dest1[xx] = dbuf[xx];
+                                       }
                                } else {
                                        memset(dest1, 0, 640 * sizeof(scrntype_t));
                                }
@@ -999,27 +1022,34 @@ void DISPLAY::draw_screen()
                                        for(int x = 0; x < 640; x++) {
                                                uint16_t cg00 = src_cg0[x] | (src_cg0[x] >> 2);
                                                
-                                               dest0[x] = get_zpriority(src_text[x], cg00, cg00);
+                                               dbuf[x] = get_zpriority(src_text[x], cg00, cg00);
                                        }
                                } else {
 #endif
                                        uint8_t* src_cg = dr_cg[y];
-                                       
+__DECL_VECTORIZED_LOOP
                                        for(int x = 0; x < 640; x++) {
 #ifdef _X1TURBOZ
-                                               dest0[x] = dr_zpalette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
+                                               dbuf[x] = dr_zpalette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
 #else
-                                               dest0[x] =  dr_palette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
+                                               dbuf[x] =  dr_palette_pc[dr_pri_line[y][src_cg[x]][src_text[x]]];
 #endif
                                        }
+__DECL_VECTORIZED_LOOP
+               for(int xx = 0; xx < 640; xx++) {
+                       dest0[xx] = dbuf[xx];
+               }
+               if(!config.scan_line) {
+__DECL_VECTORIZED_LOOP
+                       for(int xx = 0; xx < 640; xx++) {
+                               dest1[xx] = dbuf[xx];
+                       }
+               } else {
+                       memset(dest1, 0, 640 * sizeof(scrntype_t));
+               }
 #ifdef _X1TURBOZ
                                }
 #endif
-                               if(!config.scan_line) {
-                                       my_memcpy(dest1, dest0, 640 * sizeof(scrntype_t));
-                               } else {
-                                       memset(dest1, 0, 640 * sizeof(scrntype_t));
-                               }
                        }
                }
                emu->screen_skip_line(true);
index f6f790d..690b5d3 100644 (file)
@@ -34,22 +34,22 @@ private:
 #endif
        HD46505 *d_crtc;
        uint8_t* regs;
-       uint8_t vram_t[0x800];
-       uint8_t vram_a[0x800];
+       __DECL_ALIGNED(16) uint8_t vram_t[0x800];
+       __DECL_ALIGNED(16) uint8_t vram_a[0x800];
 #ifdef _X1TURBO_FEATURE
-       uint8_t vram_k[0x800];
+       __DECL_ALIGNED(16) uint8_t vram_k[0x800];
 #endif
        uint8_t* vram_ptr;
-       uint8_t pcg_b[256][8];
-       uint8_t pcg_r[256][8];
-       uint8_t pcg_g[256][8];
+       __DECL_ALIGNED(8) uint8_t pcg_b[256][8];
+       __DECL_ALIGNED(8) uint8_t pcg_r[256][8];
+       __DECL_ALIGNED(8) uint8_t pcg_g[256][8];
 #ifdef _X1TURBO_FEATURE
-       uint8_t gaiji_b[128][16];
-       uint8_t gaiji_r[128][16];
-       uint8_t gaiji_g[128][16];
+       __DECL_ALIGNED(16) uint8_t gaiji_b[128][16];
+       __DECL_ALIGNED(16) uint8_t gaiji_r[128][16];
+       __DECL_ALIGNED(16) uint8_t gaiji_g[128][16];
 #endif
-       uint8_t font[0x800];
-       uint8_t kanji[0x4bc00];
+       __DECL_ALIGNED(16) uint8_t font[0x800];
+       __DECL_ALIGNED(16) uint8_t kanji[0x4bc00];
        
        uint8_t cur_code, cur_line;
        
@@ -57,7 +57,8 @@ private:
        uint8_t* kanji_ptr;
        
        uint8_t pal[3];
-       uint8_t priority, pri[8][8];    // pri[cg][txt]
+       uint8_t priority;
+       __DECL_ALIGNED(32) uint8_t pri[8][8];   // pri[cg][txt]
        uint8_t dr_priority;
        
        bool column40;
@@ -76,41 +77,40 @@ private:
        uint8_t ztpal[8];
        uint8_t dr_zpriority;
 
-       struct {
+       __DECL_ALIGNED(32) struct {
                uint8_t b, r, g;
        } zpal[4096];
        int zpal_num;
 #endif
        
 #ifdef _X1TURBO_FEATURE
-       uint8_t text[400][640];
-       uint8_t cg[400][640];
-       uint8_t pri_line[400][8][8];
+       __DECL_ALIGNED(32) uint8_t text[400][640];
+       __DECL_ALIGNED(32) uint8_t cg[400][640];
+       __DECL_ALIGNED(32) uint8_t pri_line[400][8][8];
 
-       
-       uint8_t dr_text[400][640];
-       uint8_t dr_cg[400][640];
-       uint8_t dr_pri_line[400][8][8];
+       __DECL_ALIGNED(32) uint8_t dr_text[400][640];
+       __DECL_ALIGNED(32) uint8_t dr_cg[400][640];
+       __DECL_ALIGNED(32) uint8_t dr_pri_line[400][8][8];
 #else
-       uint8_t text[200][640+8];
-       uint8_t cg[200][640];
-       uint8_t pri_line[200][8][8];
+       __DECL_ALIGNED(32) uint8_t text[200][640+8];
+       __DECL_ALIGNED(32) uint8_t cg[200][640];
+       __DECL_ALIGNED(32) uint8_t pri_line[200][8][8];
 
-       uint8_t dr_text[200][640+8];
-       uint8_t dr_cg[200][640];
-       uint8_t dr_pri_line[200][8][8];
+       __DECL_ALIGNED(32) uint8_t dr_text[200][640+8];
+       __DECL_ALIGNED(32) uint8_t dr_cg[200][640];
+       __DECL_ALIGNED(32) uint8_t dr_pri_line[200][8][8];
 #endif
 #ifdef _X1TURBOZ
-       uint16_t zcg[2][400][640];
-       bool aen_line[400];
-       scrntype_t zpalette_pc[8+8+4096];       // 0-7:text, 8-15:cg, 16-:4096cg
+       __DECL_ALIGNED(32) uint16_t zcg[2][400][640];
+       __DECL_ALIGNED(16) bool aen_line[400];
+       __DECL_ALIGNED(32) scrntype_t zpalette_pc[8+8+4096];    // 0-7:text, 8-15:cg, 16-:4096cg
 
-       uint16_t dr_zcg[2][400][640];
-       bool dr_aen_line[400];
-       scrntype_t dr_zpalette_pc[8+8+4096];    // 0-7:text, 8-15:cg, 16-:4096cg
+       __DECL_ALIGNED(32) uint16_t dr_zcg[2][400][640];
+       __DECL_ALIGNED(16) bool dr_aen_line[400];
+       __DECL_ALIGNED(32) scrntype_t dr_zpalette_pc[8+8+4096]; // 0-7:text, 8-15:cg, 16-:4096cg
 #endif
-       scrntype_t palette_pc[8+8];             // 0-7:text, 8-15:cg
-       scrntype_t dr_palette_pc[8+8];          // 0-7:text, 8-15:cg
+       __DECL_ALIGNED(16) scrntype_t palette_pc[8+8];          // 0-7:text, 8-15:cg
+       __DECL_ALIGNED(16) scrntype_t dr_palette_pc[8+8];               // 0-7:text, 8-15:cg
        bool prev_vert_double;
        int raster, cblink;
        
@@ -150,9 +150,9 @@ private:
 
        int tmp_kanji_ptr;
 
-       _bit_trans_table_t bit_trans_table_b0;
-       _bit_trans_table_t bit_trans_table_r0;
-       _bit_trans_table_t bit_trans_table_g0;
+       __DECL_ALIGNED(16) _bit_trans_table_t bit_trans_table_b0;
+       __DECL_ALIGNED(16) _bit_trans_table_t bit_trans_table_r0;
+       __DECL_ALIGNED(16) _bit_trans_table_t bit_trans_table_g0;
 public:
        DISPLAY(VM_TEMPLATE* parent_vm, EMU_TEMPLATE* parent_emu) : DEVICE(parent_vm, parent_emu)
        {