OSDN Git Service

[COMMON] Fix unaligned SIMD variables.Fix crash built with "-msse2" at Win32.
authorK.Ohta <whatisthis.sowhat@gmail.com>
Thu, 17 Jan 2019 10:40:18 +0000 (19:40 +0900)
committerK.Ohta <whatisthis.sowhat@gmail.com>
Thu, 17 Jan 2019 10:40:18 +0000 (19:40 +0900)
[BUILD][Win32] Adjust optimize parameter for MinGW/Win32.

source/build-cmake/params/buildvars_mingw_params_gcc.dat
source/src/common.cpp
source/src/common.h
source/src/vm/fm7/display.cpp
source/src/vm/fm7/vram.cpp
source/src/vm/z80tvgame/memory.cpp
source/src/vm/z80tvgame/memory.h

index fbc814e..e12a757 100644 (file)
@@ -29,14 +29,14 @@ case ${CSP_DEBUG} in
      MAKEFLAGS_BASE2="-ggdb ${ARCH_FLAGS} ${MAKEFLAGS_BASE} ${ADDITIONAL_CFLAGS} -DNDEBUG"
      ;;
    "No" | "no" | "NO" | * )
-     MAKEFLAGS_BASE2="${MAKEFLAGS_BASE} -O3 \
+     MAKEFLAGS_BASE2="${MAKEFLAGS_BASE} -O2 \
                 ${ARCH_FLAGS} \
-                -ftree-vectorize \
-               -ftree-loop-optimize \
-               -floop-nest-optimize \
                -std=c++11 \
                ${ADDITIONAL_CFLAGS} \
                -DNDEBUG "
+#                -ftree-vectorize \
+#              -ftree-loop-optimize \
+#              -floop-nest-optimize \
      ;;
 esac
 
index beac0ce..9a10c0a 100644 (file)
@@ -537,10 +537,8 @@ uint8_t DLL_PREFIX A_OF_COLOR(scrntype_t c)
 void DLL_PREFIX PrepareBitTransTableUint16(_bit_trans_table_t *tbl, uint16_t on_val, uint16_t off_val)
 {
        if(tbl == NULL) return;
-__DECL_VECTORIZED_LOOP
        for(uint16_t i = 0; i < 256; i++) {
                uint16_t n = i;
-__DECL_VECTORIZED_LOOP
                for(int j = 0; j < 8; j++) {
                        tbl->plane_table[i].w[j] = ((n & 0x80) == 0) ? off_val : on_val;
                        n <<= 1;
@@ -554,10 +552,8 @@ __DECL_VECTORIZED_LOOP
 void DLL_PREFIX PrepareBitTransTableScrnType(_bit_trans_table_scrn_t *tbl, scrntype_t on_val, scrntype_t off_val)
 {
        if(tbl == NULL) return;
-__DECL_VECTORIZED_LOOP
        for(uint16_t i = 0; i < 256; i++) {
                uint16_t n = i;
-__DECL_VECTORIZED_LOOP
                for(int j = 0; j < 8; j++) {
                        tbl->plane_table[i].w[j] = ((n & 0x80) == 0) ? off_val : on_val;
                        n <<= 1;
@@ -569,10 +565,8 @@ __DECL_VECTORIZED_LOOP
 void DLL_PREFIX PrepareReverseBitTransTableUint16(_bit_trans_table_t *tbl, uint16_t on_val, uint16_t off_val)
 {
        if(tbl == NULL) return;
-__DECL_VECTORIZED_LOOP
        for(uint16_t i = 0; i < 256; i++) {
                uint16_t n = i;
-__DECL_VECTORIZED_LOOP
                for(int j = 0; j < 8; j++) {
                        tbl->plane_table[i].w[j] = ((n & 0x01) == 0) ? off_val : on_val;
                        n >>= 1;
@@ -583,10 +577,8 @@ __DECL_VECTORIZED_LOOP
 void DLL_PREFIX PrepareReverseBitTransTableScrnType(_bit_trans_table_scrn_t *tbl, scrntype_t on_val, scrntype_t off_val)
 {
        if(tbl == NULL) return;
-__DECL_VECTORIZED_LOOP
        for(uint16_t i = 0; i < 256; i++) {
                uint16_t n = i;
-__DECL_VECTORIZED_LOOP
                for(int j = 0; j < 8; j++) {
                        tbl->plane_table[i].w[j] = ((n & 0x01) == 0) ? off_val : on_val;
                        n >>= 1;
@@ -598,9 +590,9 @@ __DECL_VECTORIZED_LOOP
 void DLL_PREFIX ConvertByteToPackedPixelByColorTable2(uint8_t *src, scrntype_t* dst, int bytes, _bit_trans_table_scrn_t *tbl, scrntype_t *on_color_table, scrntype_t* off_color_table)
 {
        
-    scrntype_vec8_t tmpd;
-       scrntype_vec8_t tmpdd;
-       scrntype_vec8_t colors;
+    __DECL_ALIGNED(32) scrntype_vec8_t tmpd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
+       __DECL_ALIGNED(32) scrntype_vec8_t colors;
        scrntype_vec8_t* vt = (scrntype_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(scrntype_vec8_t));
        
        uintptr_t disalign = (uintptr_t)dst;
@@ -664,10 +656,10 @@ __DECL_VECTORIZED_LOOP
 void DLL_PREFIX ConvertByteToSparceUint16(uint8_t *src, uint16_t* dst, int bytes, _bit_trans_table_t *tbl, uint16_t mask)
 {
        
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
 
-       uint16_vec8_t __masks;
+       __DECL_ALIGNED(16) uint16_vec8_t __masks;
 
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 8; i++) {
@@ -706,11 +698,11 @@ __DECL_VECTORIZED_LOOP
 void DLL_PREFIX ConvertByteToSparceUint8(uint8_t *src, uint16_t* dst, int bytes, _bit_trans_table_t *tbl, uint16_t mask)
 {
        
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
 
-       uint16_vec8_t __masks;
-       uint8_vec8_t tmpdd;
+       __DECL_ALIGNED(16) uint16_vec8_t __masks;
+       __DECL_ALIGNED(16) uint8_vec8_t tmpdd;
 
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 8; i++) {
@@ -751,8 +743,8 @@ __DECL_VECTORIZED_LOOP
 void DLL_PREFIX ConvertByteToPackedPixelByColorTable(uint8_t *src, scrntype_t* dst, int bytes, _bit_trans_table_t *tbl, scrntype_t *on_color_table, scrntype_t* off_color_table)
 {
        
-       uint16_vec8_t   tmpd;
-       scrntype_vec8_t tmpdd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
        uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
        
        uintptr_t disalign = (uintptr_t)dst;
@@ -831,8 +823,8 @@ __DECL_VECTORIZED_LOOP
        uint8_t r, g, b;
        int shift = src->shift;
        const bool is_render[3] = { src->is_render[0], src->is_render[1],  src->is_render[2] };
-       uint16_vec8_t tmpd;
-       scrntype_vec8_t tmp_dd; 
+       __DECL_ALIGNED(16) uint16_vec8_t tmpd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; 
        scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
        
        x = src->begin_pos;
@@ -860,7 +852,7 @@ __DECL_VECTORIZED_LOOP
 #else // 24bit
                static const int shift_factor = 3;
 #endif
-               scrntype_vec8_t sline;
+               __DECL_ALIGNED(32) scrntype_vec8_t sline;
                scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
        __DECL_VECTORIZED_LOOP
                for(int i = 0; i < 8; i++) {
@@ -935,8 +927,8 @@ __DECL_VECTORIZED_LOOP
        uint8_t r, g, b, n;
        int shift = src->shift;
        const bool is_render[4] = { src->is_render[0], src->is_render[1],  src->is_render[2], src->is_render[3] };
-       uint16_vec8_t tmpd;
-       scrntype_vec8_t tmp_dd; 
+       __DECL_ALIGNED(16) uint16_vec8_t tmpd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; 
        scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
        
        x = src->begin_pos;
@@ -966,7 +958,7 @@ __DECL_VECTORIZED_LOOP
 #else // 24bit
                static const int shift_factor = 3;
 #endif
-               scrntype_vec8_t sline;
+               __DECL_ALIGNED(32) scrntype_vec8_t sline;
                scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
        __DECL_VECTORIZED_LOOP
                for(int i = 0; i < 8; i++) {
@@ -1035,8 +1027,8 @@ __DECL_VECTORIZED_LOOP
        uint8_t d[16];
        int shift = src->shift;
        const bool is_render[4] = { src->is_render[0], src->is_render[1],  src->is_render[2], src->is_render[3] };
-       uint16_vec8_t tmpd;
-       scrntype_vec8_t tmp_dd; 
+       __DECL_ALIGNED(16) uint16_vec8_t tmpd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; 
        scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
        
        x = src->begin_pos;
@@ -1065,7 +1057,7 @@ __DECL_VECTORIZED_LOOP
 #else // 24bit
                static const int shift_factor = 3;
 #endif
-               scrntype_vec8_t sline;
+               __DECL_ALIGNED(32) scrntype_vec8_t sline;
                scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
        __DECL_VECTORIZED_LOOP
                for(int i = 0; i < 8; i++) {
@@ -1104,7 +1096,7 @@ void DLL_PREFIX Convert2NColorsToByte_Line(_render_command_data_t *src, uint8_t
 
        uint8_t* srcp[8];
        __DECL_ALIGNED(32) uint32_t offset[8] = {0};
-       uint16_vec8_t dat;
+       __DECL_ALIGNED(16) uint16_vec8_t dat;
        uint16_vec8_t* bp[8] ;
                
 __DECL_VECTORIZED_LOOP
@@ -1152,7 +1144,7 @@ void DLL_PREFIX Convert2NColorsToByte_LineZoom2(_render_command_data_t *src, uin
 
        uint8_t* srcp[8];
        __DECL_ALIGNED(32) uint32_t offset[8] = {0};
-       uint16_vec8_t dat;
+       __DECL_ALIGNED(16) uint16_vec8_t dat;
        uint16_vec8_t* bp[8] ;
                
 __DECL_VECTORIZED_LOOP
@@ -1200,10 +1192,10 @@ void DLL_PREFIX Convert8ColorsToByte_Line(_render_command_data_t *src, uint8_t *
        uint8_t *gp = &(src->data[2][src->baseaddress[2]]);
        __DECL_ALIGNED(16) uint32_t offset[4] = {0};
 
-       uint16_vec8_t rdat;
-       uint16_vec8_t gdat;
-       uint16_vec8_t bdat;
-       uint16_vec8_t tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t rdat;
+       __DECL_ALIGNED(16) uint16_vec8_t gdat;
+       __DECL_ALIGNED(16) uint16_vec8_t bdat;
+       __DECL_ALIGNED(16) uint16_vec8_t tmpd;
 
        uint16_vec8_t* bpb = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[0]->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t* bpr = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[1]->plane_table[0]), sizeof(uint16_vec8_t));
index 10b65b7..624eb68 100644 (file)
@@ -1082,8 +1082,8 @@ typedef struct {
 
 inline scrntype_vec8_t ConvertByteToMonochromePackedPixel(uint8_t src, _bit_trans_table_t *tbl,scrntype_t on_val, scrntype_t off_val)
 {
-       uint16_vec8_t   tmpd;
-       scrntype_vec8_t tmpdd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
        _bit_trans_table_t*  vt = (_bit_trans_table_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t));
 
        tmpd.v = vt->plane_table[src].v;
@@ -1104,7 +1104,7 @@ void DLL_PREFIX ConvertByteToSparceUint8(uint8_t *src, uint16_t* dst, int bytes,
 // Table must be (ON_VAL_COLOR : OFF_VAL_COLOR)[256].
 inline scrntype_vec8_t ConvertByteToPackedPixel_PixelTbl(uint8_t src, _bit_trans_table_scrn_t *tbl)
 {
-       scrntype_vec8_t tmpdd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
        _bit_trans_table_scrn_t*  vt = (_bit_trans_table_scrn_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t));
 
        tmpdd.v = vt->plane_table[src].v;
@@ -1114,8 +1114,8 @@ inline scrntype_vec8_t ConvertByteToPackedPixel_PixelTbl(uint8_t src, _bit_trans
 // Table must be (ON_VAL_COLOR : OFF_VAL_COLOR)[256].
 inline scrntype_vec16_t ConvertByteToDoublePackedPixel_PixelTbl(uint8_t src, _bit_trans_table_scrn_t *tbl)
 {
-       scrntype_vec16_t tmpdd;
-       scrntype_vec8_t tmpd;
+       __DECL_ALIGNED(32) scrntype_vec16_t tmpdd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmpd;
        _bit_trans_table_scrn_t*  vt = (_bit_trans_table_scrn_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t));
        tmpd.v = vt->plane_table[src].v;
        int j = 0;
@@ -1131,7 +1131,7 @@ __DECL_VECTORIZED_LOOP
 // Table must be initialize ON_COLOR : OFF_COLOR
 inline void ConvertByteToDoubleMonochromeUint8(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl)
 {
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
 
        __DECL_ALIGNED(16) uint8_t d[16];
@@ -1151,7 +1151,7 @@ __DECL_VECTORIZED_LOOP
 
 inline void ConvertByteToMonochromeUint8(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl)
 {
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
 
        tmpd = vt[src];
@@ -1163,7 +1163,7 @@ __DECL_VECTORIZED_LOOP
 
 inline void ConvertRGBTo8ColorsUint8(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift)
 {
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t*  gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t*  bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t));
@@ -1180,7 +1180,7 @@ __DECL_VECTORIZED_LOOP
 
 inline void ConvertRGBTo8ColorsUint8_Zoom2Left(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift)
 {
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t*  gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t*  bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t));
@@ -1198,7 +1198,7 @@ __DECL_VECTORIZED_LOOP
 
 inline void ConvertRGBTo8ColorsUint8_Zoom2Right(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift)
 {
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t*  gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t*  bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t));
@@ -1216,7 +1216,7 @@ __DECL_VECTORIZED_LOOP
 
 inline void ConvertRGBTo8ColorsUint8_Zoom2Double(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift)
 {
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t*  gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t));
        uint16_vec8_t*  bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t));
@@ -1234,7 +1234,7 @@ __DECL_VECTORIZED_LOOP
 
 inline void ConvertByteToMonochromeUint8Cond_Zoom2(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl, uint8_t on_color, uint8_t off_color)
 {
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
 
        __DECL_ALIGNED(16) uint8_t d[16];
@@ -1254,7 +1254,7 @@ __DECL_VECTORIZED_LOOP
 
 inline void ConvertByteToMonochromeUint8Cond(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl, uint8_t on_color, uint8_t off_color)
 {
-       uint16_vec8_t   tmpd;
+       __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
        uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
 
        tmpd = vt[src];
index 67fb752..f2b36c1 100644 (file)
@@ -42,31 +42,6 @@ DISPLAY::DISPLAY(VM_TEMPLATE* parent_vm, EMU* parent_emu) : DEVICE(parent_vm, pa
        mainio = NULL;
        subcpu = NULL;
        keyboard = NULL;
-#if 1
-       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_0[0][0])), 0x0080, 0x0000);
-       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_1[0][0])), 0x0040, 0x0000);
-       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_2[0][0])), 0x0020, 0x0000);
-       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_3[0][0])), 0x0010, 0x0000);
-#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
-       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_4[0][0])), 0x0008, 0x0000);
-       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_5[0][0])), 0x0004, 0x0000);
-#endif 
-#else
-       for(int i = 0; i < 256; i++) {
-               uint16_t n = (uint16_t)i;
-               for(int j = 0; j < 8; j++) {
-                       bit_trans_table_0[i][j] = n & 0x80;
-                       bit_trans_table_1[i][j] = ((n & 0x80) != 0) ? 0x40 : 0;
-                       bit_trans_table_2[i][j] = ((n & 0x80) != 0) ? 0x20 : 0;
-                       bit_trans_table_3[i][j] = ((n & 0x80) != 0) ? 0x10 : 0;
-#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
-                       bit_trans_table_4[i][j] = ((n & 0x80) != 0) ? 0x08 : 0;
-                       bit_trans_table_5[i][j] = ((n & 0x80) != 0) ? 0x04 : 0;
-#endif                 
-                       n <<= 1;
-               }
-       }
-#endif
        displine = 0;
        active_page = 0;
 #if defined(USE_GREEN_DISPLAY)
@@ -3365,6 +3340,31 @@ void DISPLAY::initialize()
 {
        int i;
 
+#if 1
+       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_0[0][0])), 0x0080, 0x0000);
+       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_1[0][0])), 0x0040, 0x0000);
+       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_2[0][0])), 0x0020, 0x0000);
+       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_3[0][0])), 0x0010, 0x0000);
+#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
+       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_4[0][0])), 0x0008, 0x0000);
+       PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_5[0][0])), 0x0004, 0x0000);
+#endif 
+#else
+       for(int i = 0; i < 256; i++) {
+               uint16_t n = (uint16_t)i;
+               for(int j = 0; j < 8; j++) {
+                       bit_trans_table_0[i][j] = n & 0x80;
+                       bit_trans_table_1[i][j] = ((n & 0x80) != 0) ? 0x40 : 0;
+                       bit_trans_table_2[i][j] = ((n & 0x80) != 0) ? 0x20 : 0;
+                       bit_trans_table_3[i][j] = ((n & 0x80) != 0) ? 0x10 : 0;
+#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX)
+                       bit_trans_table_4[i][j] = ((n & 0x80) != 0) ? 0x08 : 0;
+                       bit_trans_table_5[i][j] = ((n & 0x80) != 0) ? 0x04 : 0;
+#endif                 
+                       n <<= 1;
+               }
+       }
+#endif
        memset(io_w_latch, 0xff, sizeof(io_w_latch));
        screen_update_flag = true;
        memset(gvram, 0x00, sizeof(gvram));
index 9b804e1..95e98cc 100644 (file)
@@ -671,8 +671,8 @@ void DISPLAY::CopyDrawnData(scrntype_t* src, scrntype_t* dst, int width, bool sc
 #endif
        scrntype_vec8_t* vsrc = (scrntype_vec8_t*)__builtin_assume_aligned(src, sizeof(scrntype_vec8_t));
        scrntype_vec8_t* vdst = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
-       scrntype_vec8_t tmp_dd;
-       scrntype_vec8_t sline;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
+       __DECL_ALIGNED(32) scrntype_vec8_t sline;
        
        if(scan_line) {
 __DECL_VECTORIZED_LOOP
@@ -747,7 +747,7 @@ void DISPLAY::GETVRAM_1_400L(int yoff, scrntype_t *p)
        pixel = gvram_shadow[yoff_d];
        uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16);
        __DECL_ALIGNED(16) uint16_vec8_t tmp_d;
-       scrntype_vec8_t tmp_dd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
        scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t));
 
        tmp_d.v = ppx->v;
@@ -770,7 +770,7 @@ void DISPLAY::GETVRAM_1_400L_GREEN(int yoff, scrntype_t *p)
        pixel = gvram_shadow[yoff_d];
        uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16);
        __DECL_ALIGNED(16) uint16_vec8_t tmp_d;
-       scrntype_vec8_t tmp_dd;
+       __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd;
        scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t));
 
        tmp_d.v = ppx->v;
@@ -794,7 +794,7 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px,
 {
        uint32_t b3, r3, g3;
        uint8_t  bb[4], rr[4], gg[4];
-       uint16_vec8_t pixels;
+       __DECL_ALIGNED(16) uint16_vec8_t pixels;
        __DECL_ALIGNED(16) const uint16_t __masks[8] = {(uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask};
        scrntype_t b, r, g;
        uint32_t idx;;
@@ -841,7 +841,7 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px,
 #else
        __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[16];
 #endif
-       uint16_vec8_t tmp_g, tmp_r, tmp_b;
+       __DECL_ALIGNED(16) uint16_vec8_t tmp_g, tmp_r, tmp_b;
        __v8hi *vp0, *vp1, *vp2, *vp3;
        // G
        vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[gg[0]][0]), 16);
@@ -895,7 +895,7 @@ __DECL_VECTORIZED_LOOP
                tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = analog_palette_pixel[pixels.w[i]];;
        }
        scrntype_vec8_t *vpx = (scrntype_vec8_t*)__builtin_assume_aligned(px, sizeof(scrntype_vec8_t));
-       scrntype_vec8_t vmask;
+       __DECL_ALIGNED(32) scrntype_vec8_t vmask;
 __DECL_VECTORIZED_LOOP
        for(int i = 0; i < 2; i++) {
                vp[i].v = dp[i].v;
@@ -957,9 +957,9 @@ void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_li
 
        uint8_t  bb[8], rr[8], gg[8];
 
-       uint16_vec8_t _btmp;
-       uint16_vec8_t _rtmp;
-       uint16_vec8_t _gtmp;
+       __DECL_ALIGNED(16) uint16_vec8_t _btmp;
+       __DECL_ALIGNED(16) uint16_vec8_t _rtmp;
+       __DECL_ALIGNED(16) uint16_vec8_t _gtmp;
        uint16_vec8_t *vp0, *vp1, *vp2, *vp3, *vp4, *vp5;
 #if !defined(FIXED_FRAMEBUFFER_SIZE)
        __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[8];
@@ -1088,7 +1088,7 @@ __DECL_VECTORIZED_LOOP
                        dp[i].v = dp[i].v >> 2;
 #endif
                }
-               scrntype_vec8_t scanline_data;
+               __DECL_ALIGNED(32) scrntype_vec8_t scanline_data;
 __DECL_VECTORIZED_LOOP
                for(int i = 0; i < 8; i++) {
                        scanline_data.w[i] = RGBA_COLOR(31, 31, 31, 255);
index e23d257..62054f1 100644 (file)
@@ -99,7 +99,7 @@ void MEMORY::draw_screen()
                        dest[x] = (val & bit) ? col_w : col_b;
                }
 #else
-               scrntype_vec8_t d;
+               __DECL_ALIGNED(32) scrntype_vec8_t d;
                for(int xx = 32; xx < (240 - 32); xx += 8) {
                        uint8_t val = ram[offset + (xx >> 3)];
                        d = ConvertByteToPackedPixel_PixelTbl(val, &pixel_trans_table);
index 5d00c14..1947346 100644 (file)
@@ -31,7 +31,7 @@ private:
        
        bool inserted;
 
-       _bit_trans_table_scrn_t pixel_trans_table;
+       __DECL_ALIGNED(32) _bit_trans_table_scrn_t pixel_trans_table;
 public:
        MEMORY(VM_TEMPLATE* parent_vm, EMU* parent_emu) : DEVICE(parent_vm, parent_emu)
        {