From: K.Ohta Date: Thu, 17 Jan 2019 10:40:18 +0000 (+0900) Subject: [COMMON] Fix unaligned SIMD variables.Fix crash built with "-msse2" at Win32. X-Git-Url: http://git.osdn.net/view?p=csp-qt%2Fcommon_source_project-fm7.git;a=commitdiff_plain;h=825bf2ca6d1fdd3b866e72eaf7e6db32a4a704c4 [COMMON] Fix unaligned SIMD variables.Fix crash built with "-msse2" at Win32. [BUILD][Win32] Adjust optimize parameter for MinGW/Win32. --- diff --git a/source/build-cmake/params/buildvars_mingw_params_gcc.dat b/source/build-cmake/params/buildvars_mingw_params_gcc.dat index fbc814e8f..e12a7578b 100644 --- a/source/build-cmake/params/buildvars_mingw_params_gcc.dat +++ b/source/build-cmake/params/buildvars_mingw_params_gcc.dat @@ -29,14 +29,14 @@ case ${CSP_DEBUG} in MAKEFLAGS_BASE2="-ggdb ${ARCH_FLAGS} ${MAKEFLAGS_BASE} ${ADDITIONAL_CFLAGS} -DNDEBUG" ;; "No" | "no" | "NO" | * ) - MAKEFLAGS_BASE2="${MAKEFLAGS_BASE} -O3 \ + MAKEFLAGS_BASE2="${MAKEFLAGS_BASE} -O2 \ ${ARCH_FLAGS} \ - -ftree-vectorize \ - -ftree-loop-optimize \ - -floop-nest-optimize \ -std=c++11 \ ${ADDITIONAL_CFLAGS} \ -DNDEBUG " +# -ftree-vectorize \ +# -ftree-loop-optimize \ +# -floop-nest-optimize \ ;; esac diff --git a/source/src/common.cpp b/source/src/common.cpp index beac0ce7a..9a10c0acb 100644 --- a/source/src/common.cpp +++ b/source/src/common.cpp @@ -537,10 +537,8 @@ uint8_t DLL_PREFIX A_OF_COLOR(scrntype_t c) void DLL_PREFIX PrepareBitTransTableUint16(_bit_trans_table_t *tbl, uint16_t on_val, uint16_t off_val) { if(tbl == NULL) return; -__DECL_VECTORIZED_LOOP for(uint16_t i = 0; i < 256; i++) { uint16_t n = i; -__DECL_VECTORIZED_LOOP for(int j = 0; j < 8; j++) { tbl->plane_table[i].w[j] = ((n & 0x80) == 0) ? off_val : on_val; n <<= 1; @@ -554,10 +552,8 @@ __DECL_VECTORIZED_LOOP void DLL_PREFIX PrepareBitTransTableScrnType(_bit_trans_table_scrn_t *tbl, scrntype_t on_val, scrntype_t off_val) { if(tbl == NULL) return; -__DECL_VECTORIZED_LOOP for(uint16_t i = 0; i < 256; i++) { uint16_t n = i; -__DECL_VECTORIZED_LOOP for(int j = 0; j < 8; j++) { tbl->plane_table[i].w[j] = ((n & 0x80) == 0) ? off_val : on_val; n <<= 1; @@ -569,10 +565,8 @@ __DECL_VECTORIZED_LOOP void DLL_PREFIX PrepareReverseBitTransTableUint16(_bit_trans_table_t *tbl, uint16_t on_val, uint16_t off_val) { if(tbl == NULL) return; -__DECL_VECTORIZED_LOOP for(uint16_t i = 0; i < 256; i++) { uint16_t n = i; -__DECL_VECTORIZED_LOOP for(int j = 0; j < 8; j++) { tbl->plane_table[i].w[j] = ((n & 0x01) == 0) ? off_val : on_val; n >>= 1; @@ -583,10 +577,8 @@ __DECL_VECTORIZED_LOOP void DLL_PREFIX PrepareReverseBitTransTableScrnType(_bit_trans_table_scrn_t *tbl, scrntype_t on_val, scrntype_t off_val) { if(tbl == NULL) return; -__DECL_VECTORIZED_LOOP for(uint16_t i = 0; i < 256; i++) { uint16_t n = i; -__DECL_VECTORIZED_LOOP for(int j = 0; j < 8; j++) { tbl->plane_table[i].w[j] = ((n & 0x01) == 0) ? off_val : on_val; n >>= 1; @@ -598,9 +590,9 @@ __DECL_VECTORIZED_LOOP void DLL_PREFIX ConvertByteToPackedPixelByColorTable2(uint8_t *src, scrntype_t* dst, int bytes, _bit_trans_table_scrn_t *tbl, scrntype_t *on_color_table, scrntype_t* off_color_table) { - scrntype_vec8_t tmpd; - scrntype_vec8_t tmpdd; - scrntype_vec8_t colors; + __DECL_ALIGNED(32) scrntype_vec8_t tmpd; + __DECL_ALIGNED(32) scrntype_vec8_t tmpdd; + __DECL_ALIGNED(32) scrntype_vec8_t colors; scrntype_vec8_t* vt = (scrntype_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(scrntype_vec8_t)); uintptr_t disalign = (uintptr_t)dst; @@ -664,10 +656,10 @@ __DECL_VECTORIZED_LOOP void DLL_PREFIX ConvertByteToSparceUint16(uint8_t *src, uint16_t* dst, int bytes, _bit_trans_table_t *tbl, uint16_t mask) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t)); - uint16_vec8_t __masks; + __DECL_ALIGNED(16) uint16_vec8_t __masks; __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { @@ -706,11 +698,11 @@ __DECL_VECTORIZED_LOOP void DLL_PREFIX ConvertByteToSparceUint8(uint8_t *src, uint16_t* dst, int bytes, _bit_trans_table_t *tbl, uint16_t mask) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t)); - uint16_vec8_t __masks; - uint8_vec8_t tmpdd; + __DECL_ALIGNED(16) uint16_vec8_t __masks; + __DECL_ALIGNED(16) uint8_vec8_t tmpdd; __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { @@ -751,8 +743,8 @@ __DECL_VECTORIZED_LOOP void DLL_PREFIX ConvertByteToPackedPixelByColorTable(uint8_t *src, scrntype_t* dst, int bytes, _bit_trans_table_t *tbl, scrntype_t *on_color_table, scrntype_t* off_color_table) { - uint16_vec8_t tmpd; - scrntype_vec8_t tmpdd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; + __DECL_ALIGNED(32) scrntype_vec8_t tmpdd; uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t)); uintptr_t disalign = (uintptr_t)dst; @@ -831,8 +823,8 @@ __DECL_VECTORIZED_LOOP uint8_t r, g, b; int shift = src->shift; const bool is_render[3] = { src->is_render[0], src->is_render[1], src->is_render[2] }; - uint16_vec8_t tmpd; - scrntype_vec8_t tmp_dd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t)); x = src->begin_pos; @@ -860,7 +852,7 @@ __DECL_VECTORIZED_LOOP #else // 24bit static const int shift_factor = 3; #endif - scrntype_vec8_t sline; + __DECL_ALIGNED(32) scrntype_vec8_t sline; scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t)); __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { @@ -935,8 +927,8 @@ __DECL_VECTORIZED_LOOP uint8_t r, g, b, n; int shift = src->shift; const bool is_render[4] = { src->is_render[0], src->is_render[1], src->is_render[2], src->is_render[3] }; - uint16_vec8_t tmpd; - scrntype_vec8_t tmp_dd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t)); x = src->begin_pos; @@ -966,7 +958,7 @@ __DECL_VECTORIZED_LOOP #else // 24bit static const int shift_factor = 3; #endif - scrntype_vec8_t sline; + __DECL_ALIGNED(32) scrntype_vec8_t sline; scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t)); __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { @@ -1035,8 +1027,8 @@ __DECL_VECTORIZED_LOOP uint8_t d[16]; int shift = src->shift; const bool is_render[4] = { src->is_render[0], src->is_render[1], src->is_render[2], src->is_render[3] }; - uint16_vec8_t tmpd; - scrntype_vec8_t tmp_dd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t)); x = src->begin_pos; @@ -1065,7 +1057,7 @@ __DECL_VECTORIZED_LOOP #else // 24bit static const int shift_factor = 3; #endif - scrntype_vec8_t sline; + __DECL_ALIGNED(32) scrntype_vec8_t sline; scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t)); __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { @@ -1104,7 +1096,7 @@ void DLL_PREFIX Convert2NColorsToByte_Line(_render_command_data_t *src, uint8_t uint8_t* srcp[8]; __DECL_ALIGNED(32) uint32_t offset[8] = {0}; - uint16_vec8_t dat; + __DECL_ALIGNED(16) uint16_vec8_t dat; uint16_vec8_t* bp[8] ; __DECL_VECTORIZED_LOOP @@ -1152,7 +1144,7 @@ void DLL_PREFIX Convert2NColorsToByte_LineZoom2(_render_command_data_t *src, uin uint8_t* srcp[8]; __DECL_ALIGNED(32) uint32_t offset[8] = {0}; - uint16_vec8_t dat; + __DECL_ALIGNED(16) uint16_vec8_t dat; uint16_vec8_t* bp[8] ; __DECL_VECTORIZED_LOOP @@ -1200,10 +1192,10 @@ void DLL_PREFIX Convert8ColorsToByte_Line(_render_command_data_t *src, uint8_t * uint8_t *gp = &(src->data[2][src->baseaddress[2]]); __DECL_ALIGNED(16) uint32_t offset[4] = {0}; - uint16_vec8_t rdat; - uint16_vec8_t gdat; - uint16_vec8_t bdat; - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t rdat; + __DECL_ALIGNED(16) uint16_vec8_t gdat; + __DECL_ALIGNED(16) uint16_vec8_t bdat; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* bpb = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[0]->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* bpr = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[1]->plane_table[0]), sizeof(uint16_vec8_t)); diff --git a/source/src/common.h b/source/src/common.h index 10b65b702..624eb68dc 100644 --- a/source/src/common.h +++ b/source/src/common.h @@ -1082,8 +1082,8 @@ typedef struct { inline scrntype_vec8_t ConvertByteToMonochromePackedPixel(uint8_t src, _bit_trans_table_t *tbl,scrntype_t on_val, scrntype_t off_val) { - uint16_vec8_t tmpd; - scrntype_vec8_t tmpdd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; + __DECL_ALIGNED(32) scrntype_vec8_t tmpdd; _bit_trans_table_t* vt = (_bit_trans_table_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t)); tmpd.v = vt->plane_table[src].v; @@ -1104,7 +1104,7 @@ void DLL_PREFIX ConvertByteToSparceUint8(uint8_t *src, uint16_t* dst, int bytes, // Table must be (ON_VAL_COLOR : OFF_VAL_COLOR)[256]. inline scrntype_vec8_t ConvertByteToPackedPixel_PixelTbl(uint8_t src, _bit_trans_table_scrn_t *tbl) { - scrntype_vec8_t tmpdd; + __DECL_ALIGNED(32) scrntype_vec8_t tmpdd; _bit_trans_table_scrn_t* vt = (_bit_trans_table_scrn_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t)); tmpdd.v = vt->plane_table[src].v; @@ -1114,8 +1114,8 @@ inline scrntype_vec8_t ConvertByteToPackedPixel_PixelTbl(uint8_t src, _bit_trans // Table must be (ON_VAL_COLOR : OFF_VAL_COLOR)[256]. inline scrntype_vec16_t ConvertByteToDoublePackedPixel_PixelTbl(uint8_t src, _bit_trans_table_scrn_t *tbl) { - scrntype_vec16_t tmpdd; - scrntype_vec8_t tmpd; + __DECL_ALIGNED(32) scrntype_vec16_t tmpdd; + __DECL_ALIGNED(32) scrntype_vec8_t tmpd; _bit_trans_table_scrn_t* vt = (_bit_trans_table_scrn_t*)__builtin_assume_aligned(tbl, sizeof(uint16_vec8_t)); tmpd.v = vt->plane_table[src].v; int j = 0; @@ -1131,7 +1131,7 @@ __DECL_VECTORIZED_LOOP // Table must be initialize ON_COLOR : OFF_COLOR inline void ConvertByteToDoubleMonochromeUint8(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t)); __DECL_ALIGNED(16) uint8_t d[16]; @@ -1151,7 +1151,7 @@ __DECL_VECTORIZED_LOOP inline void ConvertByteToMonochromeUint8(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t)); tmpd = vt[src]; @@ -1163,7 +1163,7 @@ __DECL_VECTORIZED_LOOP inline void ConvertRGBTo8ColorsUint8(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t)); @@ -1180,7 +1180,7 @@ __DECL_VECTORIZED_LOOP inline void ConvertRGBTo8ColorsUint8_Zoom2Left(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t)); @@ -1198,7 +1198,7 @@ __DECL_VECTORIZED_LOOP inline void ConvertRGBTo8ColorsUint8_Zoom2Right(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t)); @@ -1216,7 +1216,7 @@ __DECL_VECTORIZED_LOOP inline void ConvertRGBTo8ColorsUint8_Zoom2Double(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t)); uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t)); @@ -1234,7 +1234,7 @@ __DECL_VECTORIZED_LOOP inline void ConvertByteToMonochromeUint8Cond_Zoom2(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl, uint8_t on_color, uint8_t off_color) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t)); __DECL_ALIGNED(16) uint8_t d[16]; @@ -1254,7 +1254,7 @@ __DECL_VECTORIZED_LOOP inline void ConvertByteToMonochromeUint8Cond(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl, uint8_t on_color, uint8_t off_color) { - uint16_vec8_t tmpd; + __DECL_ALIGNED(16) uint16_vec8_t tmpd; uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t)); tmpd = vt[src]; diff --git a/source/src/vm/fm7/display.cpp b/source/src/vm/fm7/display.cpp index 67fb75207..f2b36c13d 100644 --- a/source/src/vm/fm7/display.cpp +++ b/source/src/vm/fm7/display.cpp @@ -42,31 +42,6 @@ DISPLAY::DISPLAY(VM_TEMPLATE* parent_vm, EMU* parent_emu) : DEVICE(parent_vm, pa mainio = NULL; subcpu = NULL; keyboard = NULL; -#if 1 - PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_0[0][0])), 0x0080, 0x0000); - PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_1[0][0])), 0x0040, 0x0000); - PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_2[0][0])), 0x0020, 0x0000); - PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_3[0][0])), 0x0010, 0x0000); -#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX) - PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_4[0][0])), 0x0008, 0x0000); - PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_5[0][0])), 0x0004, 0x0000); -#endif -#else - for(int i = 0; i < 256; i++) { - uint16_t n = (uint16_t)i; - for(int j = 0; j < 8; j++) { - bit_trans_table_0[i][j] = n & 0x80; - bit_trans_table_1[i][j] = ((n & 0x80) != 0) ? 0x40 : 0; - bit_trans_table_2[i][j] = ((n & 0x80) != 0) ? 0x20 : 0; - bit_trans_table_3[i][j] = ((n & 0x80) != 0) ? 0x10 : 0; -#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX) - bit_trans_table_4[i][j] = ((n & 0x80) != 0) ? 0x08 : 0; - bit_trans_table_5[i][j] = ((n & 0x80) != 0) ? 0x04 : 0; -#endif - n <<= 1; - } - } -#endif displine = 0; active_page = 0; #if defined(USE_GREEN_DISPLAY) @@ -3365,6 +3340,31 @@ void DISPLAY::initialize() { int i; +#if 1 + PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_0[0][0])), 0x0080, 0x0000); + PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_1[0][0])), 0x0040, 0x0000); + PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_2[0][0])), 0x0020, 0x0000); + PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_3[0][0])), 0x0010, 0x0000); +#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX) + PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_4[0][0])), 0x0008, 0x0000); + PrepareBitTransTableUint16((_bit_trans_table_t*)(&(bit_trans_table_5[0][0])), 0x0004, 0x0000); +#endif +#else + for(int i = 0; i < 256; i++) { + uint16_t n = (uint16_t)i; + for(int j = 0; j < 8; j++) { + bit_trans_table_0[i][j] = n & 0x80; + bit_trans_table_1[i][j] = ((n & 0x80) != 0) ? 0x40 : 0; + bit_trans_table_2[i][j] = ((n & 0x80) != 0) ? 0x20 : 0; + bit_trans_table_3[i][j] = ((n & 0x80) != 0) ? 0x10 : 0; +#if defined(_FM77AV40) || defined(_FM77AV40EX) || defined(_FM77AV40SX) + bit_trans_table_4[i][j] = ((n & 0x80) != 0) ? 0x08 : 0; + bit_trans_table_5[i][j] = ((n & 0x80) != 0) ? 0x04 : 0; +#endif + n <<= 1; + } + } +#endif memset(io_w_latch, 0xff, sizeof(io_w_latch)); screen_update_flag = true; memset(gvram, 0x00, sizeof(gvram)); diff --git a/source/src/vm/fm7/vram.cpp b/source/src/vm/fm7/vram.cpp index 9b804e19f..95e98ccb7 100644 --- a/source/src/vm/fm7/vram.cpp +++ b/source/src/vm/fm7/vram.cpp @@ -671,8 +671,8 @@ void DISPLAY::CopyDrawnData(scrntype_t* src, scrntype_t* dst, int width, bool sc #endif scrntype_vec8_t* vsrc = (scrntype_vec8_t*)__builtin_assume_aligned(src, sizeof(scrntype_vec8_t)); scrntype_vec8_t* vdst = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t)); - scrntype_vec8_t tmp_dd; - scrntype_vec8_t sline; + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; + __DECL_ALIGNED(32) scrntype_vec8_t sline; if(scan_line) { __DECL_VECTORIZED_LOOP @@ -747,7 +747,7 @@ void DISPLAY::GETVRAM_1_400L(int yoff, scrntype_t *p) pixel = gvram_shadow[yoff_d]; uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16); __DECL_ALIGNED(16) uint16_vec8_t tmp_d; - scrntype_vec8_t tmp_dd; + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t)); tmp_d.v = ppx->v; @@ -770,7 +770,7 @@ void DISPLAY::GETVRAM_1_400L_GREEN(int yoff, scrntype_t *p) pixel = gvram_shadow[yoff_d]; uint16_vec8_t *ppx = (uint16_vec8_t *)__builtin_assume_aligned(&(bit_trans_table_0[pixel][0]), 16); __DECL_ALIGNED(16) uint16_vec8_t tmp_d; - scrntype_vec8_t tmp_dd; + __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; scrntype_vec8_t *vp = (scrntype_vec8_t *)__builtin_assume_aligned(p, sizeof(scrntype_vec8_t)); tmp_d.v = ppx->v; @@ -794,7 +794,7 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px, { uint32_t b3, r3, g3; uint8_t bb[4], rr[4], gg[4]; - uint16_vec8_t pixels; + __DECL_ALIGNED(16) uint16_vec8_t pixels; __DECL_ALIGNED(16) const uint16_t __masks[8] = {(uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask, (uint16_t)mask}; scrntype_t b, r, g; uint32_t idx;; @@ -841,7 +841,7 @@ void DISPLAY::GETVRAM_4096(int yoff, scrntype_t *p, scrntype_t *px, #else __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[16]; #endif - uint16_vec8_t tmp_g, tmp_r, tmp_b; + __DECL_ALIGNED(16) uint16_vec8_t tmp_g, tmp_r, tmp_b; __v8hi *vp0, *vp1, *vp2, *vp3; // G vp0 = (__v8hi*)__builtin_assume_aligned(&(bit_trans_table_0[gg[0]][0]), 16); @@ -895,7 +895,7 @@ __DECL_VECTORIZED_LOOP tmp_dd[i * 2] = tmp_dd[i * 2 + 1] = analog_palette_pixel[pixels.w[i]];; } scrntype_vec8_t *vpx = (scrntype_vec8_t*)__builtin_assume_aligned(px, sizeof(scrntype_vec8_t)); - scrntype_vec8_t vmask; + __DECL_ALIGNED(32) scrntype_vec8_t vmask; __DECL_VECTORIZED_LOOP for(int i = 0; i < 2; i++) { vp[i].v = dp[i].v; @@ -957,9 +957,9 @@ void DISPLAY::GETVRAM_256k(int yoff, scrntype_t *p, scrntype_t *px, bool scan_li uint8_t bb[8], rr[8], gg[8]; - uint16_vec8_t _btmp; - uint16_vec8_t _rtmp; - uint16_vec8_t _gtmp; + __DECL_ALIGNED(16) uint16_vec8_t _btmp; + __DECL_ALIGNED(16) uint16_vec8_t _rtmp; + __DECL_ALIGNED(16) uint16_vec8_t _gtmp; uint16_vec8_t *vp0, *vp1, *vp2, *vp3, *vp4, *vp5; #if !defined(FIXED_FRAMEBUFFER_SIZE) __DECL_ALIGNED(sizeof(scrntype_t) * 8) scrntype_t tmp_dd[8]; @@ -1088,7 +1088,7 @@ __DECL_VECTORIZED_LOOP dp[i].v = dp[i].v >> 2; #endif } - scrntype_vec8_t scanline_data; + __DECL_ALIGNED(32) scrntype_vec8_t scanline_data; __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { scanline_data.w[i] = RGBA_COLOR(31, 31, 31, 255); diff --git a/source/src/vm/z80tvgame/memory.cpp b/source/src/vm/z80tvgame/memory.cpp index e23d257e1..62054f1b1 100644 --- a/source/src/vm/z80tvgame/memory.cpp +++ b/source/src/vm/z80tvgame/memory.cpp @@ -99,7 +99,7 @@ void MEMORY::draw_screen() dest[x] = (val & bit) ? col_w : col_b; } #else - scrntype_vec8_t d; + __DECL_ALIGNED(32) scrntype_vec8_t d; for(int xx = 32; xx < (240 - 32); xx += 8) { uint8_t val = ram[offset + (xx >> 3)]; d = ConvertByteToPackedPixel_PixelTbl(val, &pixel_trans_table); diff --git a/source/src/vm/z80tvgame/memory.h b/source/src/vm/z80tvgame/memory.h index 5d00c149a..1947346cf 100644 --- a/source/src/vm/z80tvgame/memory.h +++ b/source/src/vm/z80tvgame/memory.h @@ -31,7 +31,7 @@ private: bool inserted; - _bit_trans_table_scrn_t pixel_trans_table; + __DECL_ALIGNED(32) _bit_trans_table_scrn_t pixel_trans_table; public: MEMORY(VM_TEMPLATE* parent_vm, EMU* parent_emu) : DEVICE(parent_vm, parent_emu) {