From: K.Ohta Date: Mon, 5 Nov 2018 12:55:58 +0000 (+0900) Subject: [COMMON] Add some renderer to common.[cpp|h]. X-Git-Url: http://git.osdn.net/view?p=csp-qt%2Fcommon_source_project-fm7.git;a=commitdiff_plain;h=9f622184903eb3a6656f0a906a86ba985a3193ec [COMMON] Add some renderer to common.[cpp|h]. --- diff --git a/source/src/common.cpp b/source/src/common.cpp index a85c9221d..76391bf4d 100644 --- a/source/src/common.cpp +++ b/source/src/common.cpp @@ -809,6 +809,7 @@ void DLL_PREFIX Render8Colors_Line(_render_command_data_t *src, scrntype_t *dst, uint32_t offset[4] __attribute__((aligned(16))) = {0}; uint32_t beginaddr[4] __attribute__((aligned(16))) = {0}; uint32_t mask = src->addrmask; + uint32_t offsetmask = src->addrmask2; __DECL_VECTORIZED_LOOP for(int i = 0; i < 3; i++) { offset[i] = src->voffset[i]; @@ -816,7 +817,9 @@ __DECL_VECTORIZED_LOOP if(palette == NULL) { __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { - dummy_palette[i] = RGB_COLOR(i & 2, i & 4, i & 1); + dummy_palette[i] = RGB_COLOR(((i & 2) << 5) | 0x1f, + ((i & 4) << 5) | 0x1f, + ((i & 1) << 5) | 0x1f); } palette = dummy_palette; } @@ -832,30 +835,23 @@ __DECL_VECTORIZED_LOOP scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t)); x = src->begin_pos; - offset[0] = offset[0] + x; - offset[1] = offset[1] + x; - offset[2] = offset[2] + x; - if(dst2 == NULL) { + uint32_t n = x; + if(dst2 == NULL) { __DECL_VECTORIZED_LOOP for(uint32_t xx = 0; xx < src->render_width; xx++) { - b = (is_render[0]) ? bp[offset[0] & mask] : 0; - r = (is_render[1]) ? rp[offset[1] & mask] : 0; - g = (is_render[2]) ? gp[offset[2] & mask] : 0; + b = (is_render[0]) ? bp[(offset[0] + n) & mask] : 0; + r = (is_render[1]) ? rp[(offset[1] + n) & mask] : 0; + g = (is_render[2]) ? gp[(offset[2] + n) & mask] : 0; tmpd.v = vpb[b].v; tmpd.v = tmpd.v | vpr[r].v; tmpd.v = tmpd.v | vpg[g].v; -// if(shift != 0) { - tmpd.v = tmpd.v >> shift; -// } + tmpd.v = tmpd.v >> shift; + n = (n + 1) & offsetmask; __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { tmp_dd.w[i] = palette[tmpd.w[i]]; } vdp[xx].v = tmp_dd.v; - __DECL_VECTORIZED_LOOP - for(int i = 0; i < 4; i++) { - offset[i]++; - } } } else { #if defined(_RGB555) || defined(_RGBA565) @@ -865,20 +861,20 @@ __DECL_VECTORIZED_LOOP #endif scrntype_vec8_t sline __attribute__((aligned(sizeof(scrntype_vec8_t)))); scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t)); + __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { sline.w[i] = (scrntype_t)RGBA_COLOR(31, 31, 31, 255); } __DECL_VECTORIZED_LOOP for(uint32_t xx = 0; xx < src->render_width; xx++) { - b = (is_render[0]) ? bp[offset[0] & mask] : 0; - r = (is_render[1]) ? rp[offset[1] & mask] : 0; - g = (is_render[2]) ? gp[offset[2] & mask] : 0; + b = (is_render[0]) ? bp[(offset[0] + n) & mask] : 0; + r = (is_render[1]) ? rp[(offset[1] + n) & mask] : 0; + g = (is_render[2]) ? gp[(offset[2] + n) & mask] : 0; tmpd.v = vpb[b].v; tmpd.v = tmpd.v | vpr[r].v; tmpd.v = tmpd.v | vpg[g].v; -// if(shift != 0) { - tmpd.v = tmpd.v >> shift; -// } + tmpd.v = tmpd.v >> shift; + n = (n + 1) & offsetmask; __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { tmp_dd.w[i] = palette[tmpd.w[i]]; @@ -889,10 +885,6 @@ __DECL_VECTORIZED_LOOP tmp_dd.v = tmp_dd.v & sline.v; } vdp2[xx].v = tmp_dd.v; - __DECL_VECTORIZED_LOOP - for(int i = 0; i < 4; i++) { - offset[i]++; - } } } } @@ -907,7 +899,7 @@ void DLL_PREFIX Render16Colors_Line(_render_command_data_t *src, scrntype_t *dst // if(src->bit_trans_table[i] == NULL) return; // if(src->data[i] == NULL) return; // } - scrntype_t dummy_palette[8]; // fallback + scrntype_t dummy_palette[16]; // fallback scrntype_t *palette = src->palette; uint16_vec8_t *vpb = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[0], sizeof(uint16_vec8_t)); @@ -919,14 +911,18 @@ void DLL_PREFIX Render16Colors_Line(_render_command_data_t *src, scrntype_t *dst uint32_t offset[4] __attribute__((aligned(16))); uint32_t beginaddr[4] __attribute__((aligned(16))); uint32_t mask = src->addrmask; + uint32_t offsetmask = src->addrmask2; + __DECL_VECTORIZED_LOOP for(int i = 0; i < 4; i++) { offset[i] = src->voffset[i]; } if(palette == NULL) { __DECL_VECTORIZED_LOOP - for(int i = 0; i < 8; i++) { - dummy_palette[i] = RGB_COLOR(i & 2, i & 4, i & 1); + for(int i = 0; i < 16; i++) { + dummy_palette[i] = RGB_COLOR((((i & 2) + (i & 8)) << 4) | 0x0f, + (((i & 4) + (i & 8)) << 4) | 0x0f, + (((i & 1) + (i & 8)) << 4) | 0x0f); } palette = dummy_palette; } @@ -943,33 +939,25 @@ __DECL_VECTORIZED_LOOP scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t)); x = src->begin_pos; - offset[0] = offset[0] + x; - offset[1] = offset[1] + x; - offset[2] = offset[2] + x; - offset[3] = offset[3] + x; + uint32_t xn = x; if(dst2 == NULL) { __DECL_VECTORIZED_LOOP for(uint32_t xx = 0; xx < src->render_width; xx++) { - b = (is_render[0]) ? bp[offset[0] & mask] : 0; - r = (is_render[1]) ? rp[offset[1] & mask] : 0; - g = (is_render[2]) ? gp[offset[2] & mask] : 0; - n = (is_render[3]) ? np[offset[3] & mask] : 0; + b = (is_render[0]) ? bp[(offset[0] + xn) & mask] : 0; + r = (is_render[1]) ? rp[(offset[1] + xn) & mask] : 0; + g = (is_render[2]) ? gp[(offset[2] + xn) & mask] : 0; + n = (is_render[3]) ? np[(offset[3] + xn) & mask] : 0; tmpd.v = vpb[b].v; tmpd.v = tmpd.v | vpr[r].v; tmpd.v = tmpd.v | vpg[g].v; tmpd.v = tmpd.v | vpn[n].v; -// if(shift != 0) { - tmpd.v = tmpd.v >> shift; -// } + tmpd.v = tmpd.v >> shift; + xn = (xn + 1) & offsetmask; __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { tmp_dd.w[i] = palette[tmpd.w[i]]; } vdp[xx].v = tmp_dd.v; - __DECL_VECTORIZED_LOOP - for(int i = 0; i < 4; i++) { - offset[i]++; - } } } else { #if defined(_RGB555) || defined(_RGBA565) @@ -979,22 +967,22 @@ __DECL_VECTORIZED_LOOP #endif scrntype_vec8_t sline __attribute__((aligned(sizeof(scrntype_vec8_t)))); scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t)); + __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { sline.w[i] = (scrntype_t)RGBA_COLOR(31, 31, 31, 255); } __DECL_VECTORIZED_LOOP for(uint32_t xx = 0; xx < src->render_width; xx++) { - b = (is_render[0]) ? bp[offset[0] & mask] : 0; - r = (is_render[1]) ? rp[offset[1] & mask] : 0; - g = (is_render[2]) ? gp[offset[2] & mask] : 0; - n = (is_render[3]) ? np[offset[3] & mask] : 0; + b = (is_render[0]) ? bp[(offset[0] + xn) & mask] : 0; + r = (is_render[1]) ? rp[(offset[1] + xn) & mask] : 0; + g = (is_render[2]) ? gp[(offset[2] + xn) & mask] : 0; + n = (is_render[3]) ? np[(offset[3] + xn) & mask] : 0; tmpd.v = vpb[b].v; tmpd.v = tmpd.v | vpr[r].v; tmpd.v = tmpd.v | vpg[g].v; tmpd.v = tmpd.v | vpn[n].v; -// if(shift != 0) { - tmpd.v = tmpd.v >> shift; -// } + tmpd.v = tmpd.v >> shift; + xn = (xn + 1) & offsetmask; __DECL_VECTORIZED_LOOP for(int i = 0; i < 8; i++) { tmp_dd.w[i] = palette[tmpd.w[i]]; @@ -1005,14 +993,257 @@ __DECL_VECTORIZED_LOOP tmp_dd.v = tmp_dd.v & sline.v; } vdp2[xx].v = tmp_dd.v; + } + } +} + +// src->palette Must be 2^planes entries. +void DLL_PREFIX Render2NColors_Line(_render_command_data_t *src, scrntype_t *dst, scrntype_t* dst2, bool scan_line, int planes) +{ + if(src == NULL) return; + if(dst == NULL) return; + if(src->palette == NULL) return; + if(planes <= 0) return; + if(planes >= 16) planes = 16; +//__DECL_VECTORIZED_LOOP +// for(int i = 0; i < 3; i++) { +// if(src->bit_trans_table[i] == NULL) return; +// if(src->data[i] == NULL) return; +// } + scrntype_t *palette = src->palette; + + + uint16_vec8_t* vp[16]; + for(int i = 0; i < planes; i++) { + vp[i] = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[i], sizeof(uint16_vec8_t)); + } + + uint32_t x; + uint32_t offset[16] __attribute__((aligned(16))); + uint32_t beginaddr[16] __attribute__((aligned(16))); + uint32_t mask = src->addrmask; + uint32_t offsetmask = src->addrmask2; +__DECL_VECTORIZED_LOOP + for(int i = 0; i < planes; i++) { + offset[i] = src->voffset[i]; + } + uint8_t *pp[16]; + for(int i = 0; i < planes; i++) { + pp[i] = &(src->data[i][src->baseaddress[i]]); + } + + uint8_t d[16]; + int shift = src->shift; + const bool is_render[4] = { src->is_render[0], src->is_render[1], src->is_render[2], src->is_render[3] }; + uint16_vec8_t tmpd __attribute__((aligned(sizeof(uint16_vec8_t)))); + scrntype_vec8_t tmp_dd; + scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t)); + + x = src->begin_pos; + if(dst2 == NULL) { + uint32_t n = x; + __DECL_VECTORIZED_LOOP + for(uint32_t xx = 0; xx < src->render_width; xx++) { + d[0] = (is_render[0]) ? pp[0][(offset[0] + n) & mask] : 0; + tmpd.v = vp[0][d[0]].v; __DECL_VECTORIZED_LOOP - for(int i = 0; i < 4; i++) { - offset[i]++; + for(int i = 1; i < planes; i++) { + d[i] = (is_render[i]) ? pp[i][(offset[i] + n) & mask] : 0; + tmpd.v = tmpd.v | vp[i][d[i]].v; } + n = (n + 1) & offsetmask; + tmpd.v = tmpd.v >> shift; + __DECL_VECTORIZED_LOOP + for(int i = 0; i < 8; i++) { + tmp_dd.w[i] = palette[tmpd.w[i]]; + } + vdp[xx].v = tmp_dd.v; + } + } else { +#if defined(_RGB555) || defined(_RGBA565) + static const int shift_factor = 2; +#else // 24bit + static const int shift_factor = 3; +#endif + scrntype_vec8_t sline __attribute__((aligned(sizeof(scrntype_vec8_t)))); + scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t)); + __DECL_VECTORIZED_LOOP + for(int i = 0; i < 8; i++) { + sline.w[i] = (scrntype_t)RGBA_COLOR(31, 31, 31, 255); + } + uint32_t n = x; + __DECL_VECTORIZED_LOOP + for(uint32_t xx = 0; xx < src->render_width; xx++) { + d[0] = (is_render[0]) ? pp[0][(offset[0] + n) & mask] : 0; + tmpd.v = vp[0][d[0]].v; + __DECL_VECTORIZED_LOOP + for(int i = 1; i < planes; i++) { + d[i] = (is_render[i]) ? pp[i][(offset[i] + n) & mask] : 0; + tmpd.v = tmpd.v | vp[i][d[i]].v; + } + n = (n + 1) & offsetmask; + tmpd.v = tmpd.v >> shift; + __DECL_VECTORIZED_LOOP + for(int i = 0; i < 8; i++) { + tmp_dd.w[i] = palette[tmpd.w[i]]; + } + vdp[xx].v = tmp_dd.v; + if(scan_line) { + tmp_dd.v = tmp_dd.v >> shift_factor; + tmp_dd.v = tmp_dd.v & sline.v; + } + vdp2[xx].v = tmp_dd.v; } } } +void DLL_PREFIX Convert2NColorsToByte_Line(_render_command_data_t *src, uint8_t *dst, int planes) +{ + if(planes >= 8) planes = 8; + if(planes <= 0) return; + + uint8_t* srcp[8]; + uint32_t offset[8] __attribute__((aligned(16))) = {0}; + uint16_vec8_t dat __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t* bp[8] ; + +__DECL_VECTORIZED_LOOP + for(int i = 0; i < planes; i++) { + bp[i] = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[i]->plane_table[0]), sizeof(uint16_vec8_t)); + srcp[i] = &(src->data[i][src->baseaddress[i]]); + } + uint32_t addrmask = src->addrmask; + uint32_t offsetmask = src->addrmask2; + int shift = src->shift; + +__DECL_VECTORIZED_LOOP + for(int i = 0; i < planes; i++) { + offset[i] = src->voffset[i]; + } + + uint32_t noffset = src->begin_pos & offsetmask; + uint8_t td[16]; +__DECL_VECTORIZED_LOOP + for(int x = 0; x < src->render_width; x++) { +__DECL_VECTORIZED_LOOP + for(int i = 0; i < planes; i++) { + td[i] = srcp[(noffset + offset[i]) & addrmask]; + } + noffset = (noffset + 1) & offsetmask; + dat.v = bp[0][td[0]].v; +__DECL_VECTORIZED_LOOP + for(int i = 1; i < planes; i++) { + dat.v = dat.v | bp[i][td[i]].v; + } + dat.v = dat.v >> shift; +__DECL_VECTORIZED_LOOP + for(int i = 0; i < 8; i++) { + dst[i] = (uint8_t)(dat.w[i]); + } + dst += 8; + + } +} + +void DLL_PREFIX Convert2NColorsToByte_LineZoom2(_render_command_data_t *src, uint8_t *dst, int planes) +{ + if(planes >= 8) planes = 8; + if(planes <= 0) return; + + uint8_t* srcp[8]; + uint32_t offset[8] __attribute__((aligned(16))) = {0}; + uint16_vec8_t dat __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t* bp[8] ; + +__DECL_VECTORIZED_LOOP + for(int i = 0; i < planes; i++) { + bp[i] = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[i]->plane_table[0]), sizeof(uint16_vec8_t)); + srcp[i] = &(src->data[i][src->baseaddress[i]]); + } + uint32_t addrmask = src->addrmask; + uint32_t offsetmask = src->addrmask2; + int shift = src->shift; + +__DECL_VECTORIZED_LOOP + for(int i = 0; i < planes; i++) { + offset[i] = src->voffset[i]; + } + + uint32_t noffset = src->begin_pos & offsetmask; + uint8_t td[16]; +__DECL_VECTORIZED_LOOP + for(int x = 0; x < src->render_width; x++) { +__DECL_VECTORIZED_LOOP + for(int i = 0; i < planes; i++) { + td[i] = srcp[(noffset + offset[i]) & addrmask]; + } + noffset = (noffset + 1) & offsetmask; + dat.v = bp[0][td[0]].v; +__DECL_VECTORIZED_LOOP + for(int i = 1; i < planes; i++) { + dat.v = dat.v | bp[i][td[i]].v; + } + dat.v = dat.v >> shift; +__DECL_VECTORIZED_LOOP + for(int i = 0, j = 0; i < 16; i +=2, j++) { + dst[i] = (uint8_t)(dat.w[j]); + dst[i + 1] = (uint8_t)(dat.w[j]); + } + dst += 16; + } +} + +void DLL_PREFIX Convert8ColorsToByte_Line(_render_command_data_t *src, uint8_t *dst) +{ + uint8_t *bp = &(src->data[0][src->baseaddress[0]]); + uint8_t *rp = &(src->data[1][src->baseaddress[1]]); + uint8_t *gp = &(src->data[2][src->baseaddress[2]]); + uint32_t offset[4] __attribute__((aligned(16))) = {0}; + + uint16_vec8_t rdat __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t gdat __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t bdat __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t tmpd __attribute__((aligned(sizeof(uint16_vec8_t)))); + + uint16_vec8_t* bpb = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[0]->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* bpr = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[1]->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* bpg = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[2]->plane_table[0]), sizeof(uint16_vec8_t)); + + uint32_t addrmask = src->addrmask; + uint32_t offsetmask = src->addrmask2; + int shift = src->shift; + +__DECL_VECTORIZED_LOOP + for(int i = 0; i < 3; i++) { + offset[i] = src->voffset[i]; + } + + uint32_t noffset = src->begin_pos & offsetmask; + uint8_t b, r, g; +__DECL_VECTORIZED_LOOP + for(int x = 0; x < src->render_width; x++) { + b = bp[(noffset + offset[0]) & addrmask]; + r = rp[(noffset + offset[1]) & addrmask]; + g = gp[(noffset + offset[2]) & addrmask]; + + noffset = (noffset + 1) & offsetmask; + + bdat.v = bpb[b].v; + rdat.v = bpr[r].v; + gdat.v = bpg[g].v; + tmpd.v = bdat.v; + tmpd.v = tmpd.v | rdat.v; + tmpd.v = tmpd.v | gdat.v; + tmpd.v = tmpd.v >> shift; + +__DECL_VECTORIZED_LOOP + for(int i = 0; i < 8; i++) { + dst[i] = (uint8_t)(tmpd.w[i]); + } + dst += 8; + } +} + #ifndef _MSC_VER struct to_upper { // Refer from documentation of libstdc++, GCC5. diff --git a/source/src/common.h b/source/src/common.h index 6129aa4ee..1ecfd65c5 100644 --- a/source/src/common.h +++ b/source/src/common.h @@ -1057,7 +1057,8 @@ typedef struct { uint8_t* data[16]; uint32_t baseaddress[16]; uint32_t voffset[16]; - uint32_t addrmask; + uint32_t addrmask; // For global increment. + uint32_t addrmask2; // For local increment. uint32_t begin_pos; uint32_t render_width; } _render_command_data_t; @@ -1144,7 +1145,78 @@ __DECL_VECTORIZED_LOOP } } -inline void ConvertByteToDoubleMonochromeUint8Cond(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl, uint8_t on_color, uint8_t off_color) +inline void ConvertRGBTo8ColorsUint8(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift) +{ + uint16_vec8_t tmpd __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t)); + + tmpd.v = rvt[r].v; + tmpd.v = tmpd.v | gvt[g].v; + tmpd.v = tmpd.v | bvt[b].v; + tmpd.v = tmpd.v >> shift; +__DECL_VECTORIZED_LOOP + for(int i = 0; i < 8; i++) { + dst[i] = (uint8_t)(tmpd.w[i]); + } +} + +inline void ConvertRGBTo8ColorsUint8_Zoom2Left(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift) +{ + uint16_vec8_t tmpd __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t)); + + tmpd.v = rvt[r].v; + tmpd.v = tmpd.v | gvt[g].v; + tmpd.v = tmpd.v | bvt[b].v; + tmpd.v = tmpd.v >> shift; +__DECL_VECTORIZED_LOOP + for(int i = 0, j = 0; i < 8; i += 2, j++) { + dst[i] = (uint8_t)(tmpd.w[j]); + dst[i + 1] = (uint8_t)(tmpd.w[j]); + } +} + +inline void ConvertRGBTo8ColorsUint8_Zoom2Right(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift) +{ + uint16_vec8_t tmpd __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t)); + + tmpd.v = rvt[r].v; + tmpd.v = tmpd.v | gvt[g].v; + tmpd.v = tmpd.v | bvt[b].v; + tmpd.v = tmpd.v >> shift; +__DECL_VECTORIZED_LOOP + for(int i = 0, j = 4; i < 8; i += 2, j++) { + dst[i] = (uint8_t)(tmpd.w[j]); + dst[i + 1] = (uint8_t)(tmpd.w[j]); + } +} + +inline void ConvertRGBTo8ColorsUint8_Zoom2Double(uint8_t r, uint8_t g, uint8_t b, uint8_t* dst, _bit_trans_table_t* rtbl, _bit_trans_table_t* gtbl, _bit_trans_table_t* btbl, int shift) +{ + uint16_vec8_t tmpd __attribute__((aligned(sizeof(uint16_vec8_t)))); + uint16_vec8_t* rvt = (uint16_vec8_t*)__builtin_assume_aligned(&(rtbl->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* gvt = (uint16_vec8_t*)__builtin_assume_aligned(&(gtbl->plane_table[0]), sizeof(uint16_vec8_t)); + uint16_vec8_t* bvt = (uint16_vec8_t*)__builtin_assume_aligned(&(btbl->plane_table[0]), sizeof(uint16_vec8_t)); + + tmpd.v = rvt[r].v; + tmpd.v = tmpd.v | gvt[g].v; + tmpd.v = tmpd.v | bvt[b].v; + tmpd.v = tmpd.v >> shift; +__DECL_VECTORIZED_LOOP + for(int i = 0, j = 0; i < 16; i += 2, j++) { + dst[i] = (uint8_t)(tmpd.w[j]); + dst[i + 1] = (uint8_t)(tmpd.w[j]); + } +} + +inline void ConvertByteToMonochromeUint8Cond_Zoom2(uint8_t src, uint8_t* dst, _bit_trans_table_t* tbl, uint8_t on_color, uint8_t off_color) { uint16_vec8_t tmpd __attribute__((aligned(sizeof(uint16_vec8_t)))); uint16_vec8_t* vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t)); @@ -1185,6 +1257,11 @@ void DLL_PREFIX PrepareReverseBitTransTableScrnType(_bit_trans_table_scrn_t *tbl void DLL_PREFIX Render8Colors_Line(_render_command_data_t *src, scrntype_t *dst, scrntype_t *dst2, bool scan_line); void DLL_PREFIX Render16Colors_Line(_render_command_data_t *src, scrntype_t *dst, scrntype_t *dst2, bool scan_line); +void DLL_PREFIX Render2NColors_Line(_render_command_data_t *src, scrntype_t *dst, scrntype_t* dst2, bool scan_line, int planes); + +void DLL_PREFIX Convert8ColorsToByte_Line(_render_command_data_t *src, uint8_t *dst); +void DLL_PREFIX Convert2NColorsToByte_Line(_render_command_data_t *src, uint8_t *dst, int planes); +void DLL_PREFIX Convert2NColorsToByte_LineZoom2(_render_command_data_t *src, uint8_t *dst, int planes); inline uint64_t ExchangeEndianU64(uint64_t __in) {