OSDN Git Service

[COMMON] Fix unaligned SIMD variables.Fix crash built with "-msse2" at Win32.
[csp-qt/common_source_project-fm7.git] / source / src / common.cpp
1 /*
2         Skelton for retropc emulator
3
4         Author : Takeda.Toshiya
5         Date   : 2013.01.17-
6
7         [ common ]
8 */
9
10 #if defined(_USE_QT)
11         #include <string.h>
12         #include <fcntl.h>
13         #if !defined(__WIN32) && !defined(__WIN64)
14                 #include <unistd.h>
15         #else
16                 #include <io.h>
17                 #include <direct.h>
18         #endif
19         #include <sys/types.h>
20         #include <sys/stat.h>
21         #include "csp_logger.h"
22         #include <string>
23         #include <algorithm>
24         #include <cctype>
25         #include <QDir>
26         #include <QFileInfo>
27 #elif defined(_WIN32)
28         #include <shlwapi.h>
29         #pragma comment(lib, "shlwapi.lib")
30 #else
31         #include <time.h>
32 #endif
33 #include <math.h>
34 #include "common.h"
35 #include "fileio.h"
36
37 #if defined(__MINGW32__) || defined(__MINGW64__)
38         extern DWORD GetLongPathName(LPCTSTR lpszShortPath, LPTSTR lpszLongPath, DWORD cchBuffer);
39 #endif
40 #if defined(_USE_QT)
41         std::string DLL_PREFIX cpp_homedir;
42         std::string DLL_PREFIX my_procname;
43         std::string DLL_PREFIX sRssDir;
44 #endif
45
46 void DLL_PREFIX common_initialize()
47 {
48         // get the initial current path when the software starts
49         get_initial_current_path();
50 }
51
52 uint32_t DLL_PREFIX EndianToLittle_DWORD(uint32_t x)
53 {
54 #if defined(__LITTLE_ENDIAN__)
55         return x;
56 #else
57         uint32_t y;
58         y = ((x & 0x000000ff) << 24) | ((x & 0x0000ff00) << 8) |
59             ((x & 0x00ff0000) >> 8)  | ((x & 0xff000000) >> 24);
60         return y;
61 #endif
62 }
63
64 uint16_t DLL_PREFIX EndianToLittle_WORD(uint16_t x)
65 {
66 #if defined(__LITTLE_ENDIAN__)
67         return x;
68 #else
69         uint16_t y;
70         y = ((x & 0x00ff) << 8) | ((x & 0xff00) >> 8);
71         return y;
72 #endif
73 }
74
75 uint32_t DLL_PREFIX EndianFromLittle_DWORD(uint32_t x)
76 {
77 #if defined(__LITTLE_ENDIAN__)
78         return x;
79 #else
80         uint32_t y;
81         y = ((x & 0x000000ff) << 24) | ((x & 0x0000ff00) << 8) |
82             ((x & 0x00ff0000) >> 8)  | ((x & 0xff000000) >> 24);
83         return y;
84 #endif
85 }
86
87 uint16_t DLL_PREFIX EndianFromLittle_WORD(uint16_t x)
88 {
89 #if defined(__LITTLE_ENDIAN__)
90         return x;
91 #else
92         uint16_t y;
93         y = ((x & 0x00ff) << 8) | ((x & 0xff00) >> 8);
94         return y;
95 #endif
96 }
97
98
99 uint32_t DLL_PREFIX EndianToBig_DWORD(uint32_t x)
100 {
101 #if defined(__BIG_ENDIAN__)
102         return x;
103 #else
104         uint32_t y;
105         y = ((x & 0x000000ff) << 24) | ((x & 0x0000ff00) << 8) |
106             ((x & 0x00ff0000) >> 8)  | ((x & 0xff000000) >> 24);
107         return y;
108 #endif
109 }
110
111 uint16_t DLL_PREFIX EndianToBig_WORD(uint16_t x)
112 {
113 #if defined(__BIG_ENDIAN__)
114         return x;
115 #else
116         uint16_t y;
117         y = ((x & 0x00ff) << 8) | ((x & 0xff00) >> 8);
118         return y;
119 #endif
120 }
121
122 uint32_t DLL_PREFIX EndianFromBig_DWORD(uint32_t x)
123 {
124 #if defined(__BIG_ENDIAN__)
125         return x;
126 #else
127         uint32_t y;
128         y = ((x & 0x000000ff) << 24) | ((x & 0x0000ff00) << 8) |
129             ((x & 0x00ff0000) >> 8)  | ((x & 0xff000000) >> 24);
130         return y;
131 #endif
132 }
133
134 uint16_t DLL_PREFIX EndianFromBig_WORD(uint16_t x)
135 {
136 #if defined(__BIG_ENDIAN__)
137         return x;
138 #else
139         uint16_t y;
140         y = ((x & 0x00ff) << 8) | ((x & 0xff00) >> 8);
141         return y;
142 #endif
143 }
144
145
146 #ifndef _MSC_VER
147 int DLL_PREFIX max(int a, int b)
148 {
149         if(a > b) {
150                 return a;
151         } else {
152                 return b;
153         }
154 }
155
156
157 unsigned DLL_PREFIX int max(unsigned int a, int b)
158 {
159         if(b < 0) return a;
160         if(a > (unsigned int)b) {
161                 return a;
162         } else {
163                 return b;
164         }
165 }
166
167 unsigned DLL_PREFIX int max(int a, unsigned int b)
168 {
169         if(a < 0) return b;
170         if((unsigned int)a > b) {
171                 return a;
172         } else {
173                 return b;
174         }
175 }
176
177 unsigned int DLL_PREFIX max(unsigned int a, unsigned int b)
178 {
179         if(a > b) {
180                 return a;
181         } else {
182                 return b;
183         }
184 }
185
186 int DLL_PREFIX min(int a, int b)
187 {
188         if(a < b) {
189                 return a;
190         } else {
191                 return b;
192         }
193 }
194
195 int DLL_PREFIX min(unsigned int a, int b)
196 {
197         if(b < 0) return b;
198         if(a > INT_MAX) return b;
199         
200         if((int)a < b) {
201                 return (int)a;
202         } else {
203                 return b;
204         }
205 }
206
207 int DLL_PREFIX min(int a, unsigned int b)
208 {
209         if(a < 0) return a;
210         if(b > INT_MAX) return a;
211         
212         if(a < (int)b) {
213                 return a;
214         } else {
215                 return (int)b;
216         }
217 }
218
219 unsigned int DLL_PREFIX min(unsigned int a, unsigned int b)
220 {
221         if(a < b) {
222                 return a;
223         } else {
224                 return b;
225         }
226 }
227 #endif
228
229 #ifndef SUPPORT_SECURE_FUNCTIONS
230 //errno_t my_tfopen_s(FILE** pFile, const _TCHAR *filename, const _TCHAR *mode)
231 //{
232 //      if((*pFile = _tfopen(filename, mode)) != NULL) {
233 //              return 0;
234 //      } else {
235 //              return errno;
236 //      }
237 //}
238
239 errno_t DLL_PREFIX my_tcscat_s(_TCHAR *strDestination, size_t numberOfElements, const _TCHAR *strSource)
240 {
241         _tcscat(strDestination, strSource);
242         return 0;
243 }
244
245 errno_t DLL_PREFIX my_strcpy_s(char *strDestination, size_t numberOfElements, const char *strSource)
246 {
247         strcpy(strDestination, strSource);
248         return 0;
249 }
250
251 errno_t DLL_PREFIX my_tcscpy_s(_TCHAR *strDestination, size_t numberOfElements, const _TCHAR *strSource)
252 {
253         _tcscpy(strDestination, strSource);
254         return 0;
255 }
256
257 errno_t DLL_PREFIX my_strncpy_s(char *strDestination, size_t numberOfElements, const char *strSource, size_t count)
258 {
259         strncpy(strDestination, strSource, count);
260         return 0;
261 }
262
263 errno_t DLL_PREFIX my_tcsncpy_s(_TCHAR *strDestination, size_t numberOfElements, const _TCHAR *strSource, size_t count)
264 {
265         _tcsncpy(strDestination, strSource, count);
266         return 0;
267 }
268
269 char *DLL_PREFIX my_strtok_s(char *strToken, const char *strDelimit, char **context)
270 {
271         return strtok(strToken, strDelimit);
272 }
273
274 _TCHAR *DLL_PREFIX my_tcstok_s(_TCHAR *strToken, const char *strDelimit, _TCHAR **context)
275 {
276         return _tcstok(strToken, strDelimit);
277 }
278
279 int DLL_PREFIX my_sprintf_s(char *buffer, size_t sizeOfBuffer, const char *format, ...)
280 {
281         va_list ap;
282         va_start(ap, format);
283         int result = vsnprintf(buffer, sizeOfBuffer, format, ap);
284         va_end(ap);
285         return result;
286 }
287
288 int DLL_PREFIX my_swprintf_s(wchar_t *buffer, size_t sizeOfBuffer, const wchar_t *format, ...)
289 {
290         va_list ap;
291         va_start(ap, format);
292         int result = vswprintf(buffer, sizeOfBuffer, format, ap);
293         va_end(ap);
294         return result;
295 }
296
297 int DLL_PREFIX my_stprintf_s(_TCHAR *buffer, size_t sizeOfBuffer, const _TCHAR *format, ...)
298 {
299         va_list ap;
300         va_start(ap, format);
301         int result = vsnprintf(buffer, sizeOfBuffer, format, ap);
302         va_end(ap);
303         return result;
304 }
305
306 int DLL_PREFIX my_vsprintf_s(char *buffer, size_t numberOfElements, const char *format, va_list argptr)
307 {
308         return vsnprintf(buffer, numberOfElements * sizeof(char), format, argptr);
309 }
310
311 int DLL_PREFIX my_vstprintf_s(_TCHAR *buffer, size_t numberOfElements, const _TCHAR *format, va_list argptr)
312 {
313         return vsnprintf(buffer, numberOfElements * sizeof(_TCHAR), format, argptr);
314 }
315 #endif
316
317 //#ifdef USE_FAST_MEMCPY
318
319 void DLL_PREFIX *my_memcpy(void *dst, void *src, size_t len)
320 {
321         return memcpy(dst, src, len);
322 }
323 //#endif
324
325
326 #ifndef _WIN32
327 BOOL DLL_PREFIX MyWritePrivateProfileString(LPCTSTR lpAppName, LPCTSTR lpKeyName, LPCTSTR lpString, LPCTSTR lpFileName)
328 {
329         BOOL result = FALSE;
330         FILEIO* fio_i = new FILEIO();
331         if(fio_i->Fopen(lpFileName, FILEIO_READ_ASCII)) {
332                 char tmp_path[_MAX_PATH];
333                 my_sprintf_s(tmp_path, _MAX_PATH, "%s.$$$", lpFileName);
334                 FILEIO* fio_o = new FILEIO();
335                 if(fio_o->Fopen(tmp_path, FILEIO_WRITE_ASCII)) {
336                         bool in_section = false;
337                         char section[1024], line[1024], *equal;
338                         my_sprintf_s(section, 1024, "[%s]", lpAppName);
339                         while(fio_i->Fgets(line, 1024) != NULL && strlen(line) > 0) {
340                                 if(line[strlen(line) - 1] == '\n') {
341                                         line[strlen(line) - 1] = '\0';
342                                 }
343                                 if(!result) {
344                                         if(line[0] == '[') {
345                                                 if(in_section) {
346                                                         fio_o->Fprintf("%s=%s\n", lpKeyName, lpString);
347                                                         result = TRUE;
348                                                 } else if(strcmp(line, section) == 0) {
349                                                         in_section = true;
350                                                 }
351                                         } else if(in_section && (equal = strstr(line, "=")) != NULL) {
352                                                 *equal = '\0';
353                                                 if(strcmp(line, lpKeyName) == 0) {
354                                                         fio_o->Fprintf("%s=%s\n", lpKeyName, lpString);
355                                                         result = TRUE;
356                                                         continue;
357                                                 }
358                                                 *equal = '=';
359                                         }
360                                 }
361                                 fio_o->Fprintf("%s\n", line);
362                         }
363                         if(!result) {
364                                 if(!in_section) {
365                                         fio_o->Fprintf("[%s]\n", lpAppName);
366                                 }
367                                 fio_o->Fprintf("%s=%s\n", lpKeyName, lpString);
368                                 result = TRUE;
369                         }
370                         fio_o->Fclose();
371                 }
372                 delete fio_o;
373                 fio_i->Fclose();
374                 if(result) {
375                         if(!(FILEIO::RemoveFile(lpFileName) && FILEIO::RenameFile(tmp_path, lpFileName))) {
376                                 result = FALSE;
377                         }
378                 }
379         } else {
380                 FILEIO* fio_o = new FILEIO();
381                 if(fio_o->Fopen(lpFileName, FILEIO_WRITE_ASCII)) {
382                         fio_o->Fprintf("[%s]\n", lpAppName);
383                         fio_o->Fprintf("%s=%s\n", lpKeyName, lpString);
384                         fio_o->Fclose();
385                 }
386                 delete fio_o;
387         }
388         delete fio_i;
389         return result;
390 }
391
392
393 DWORD DLL_PREFIX MyGetPrivateProfileString(LPCTSTR lpAppName, LPCTSTR lpKeyName, LPCTSTR lpDefault, LPTSTR lpReturnedString, DWORD nSize, LPCTSTR lpFileName)
394 {
395         _TCHAR *lpp = (_TCHAR *)lpReturnedString;
396         if(lpDefault != NULL) {
397                 my_strcpy_s(lpp, nSize, lpDefault);
398         } else {
399                 lpp[0] = '\0';
400         }
401         FILEIO* fio = new FILEIO();
402         if(!(fio->IsFileExisting(lpFileName))) return 0;
403         if(fio->Fopen(lpFileName, FILEIO_READ_ASCII)) {
404                 bool in_section = false;
405                 char section[1024], line[1024], *equal;
406                 my_sprintf_s(section, 1024, "[%s]", lpAppName);
407                 while(fio->Fgets(line, 1024) != NULL && strlen(line) > 0) {
408                         if(line[strlen(line) - 1] == '\n') {
409                                 line[strlen(line) - 1] = '\0';
410                         }
411                         if(line[0] == '[') {
412                                 if(in_section) {
413                                         break;
414                                 } else if(strcmp(line, section) == 0) {
415                                         in_section = true;
416                                 }
417                         } else if(in_section && (equal = strstr(line, "=")) != NULL) {
418                                 *equal = '\0';
419                                 if(strcmp(line, lpKeyName) == 0) {
420                                         my_strcpy_s(lpp, nSize, equal + 1);
421                                         break;
422                                 }
423                         }
424                 }
425                 fio->Fclose();
426         }
427         delete fio;
428         //csp_logger->debug_log(CSP_LOG_DEBUG, CSP_LOG_TYPE_GENERAL, "Try App: %s Key: %s", lpAppName, lpKeyName);
429         return strlen(lpp);
430 }
431
432 UINT DLL_PREFIX MyGetPrivateProfileInt(LPCTSTR lpAppName, LPCTSTR lpKeyName, INT nDefault, LPCTSTR lpFileName)
433 {
434         int i;
435         char sstr[128];
436         char sval[128];
437         std::string s;
438         memset(sstr, 0x00, sizeof(sstr));
439         memset(sval, 0x00, sizeof(sval));
440         snprintf(sval, 128, "%d", nDefault); 
441         MyGetPrivateProfileString(lpAppName,lpKeyName, sval, sstr, 128, lpFileName);
442         s = sstr;
443         
444         if(s.empty()) {
445                 i = nDefault;
446         } else {
447                 i = strtol(s.c_str(), NULL, 10);
448         }
449         //csp_logger->debug_log(CSP_LOG_DEBUG, CSP_LOG_TYPE_GENERAL, "Got Int: %d\n", i);
450         return i;
451 }
452 #endif
453
454 #if defined(_RGB555)
455 scrntype_t DLL_PREFIX RGB_COLOR(uint32_t r, uint32_t g, uint32_t b)
456 {
457         scrntype_t rr = ((scrntype_t)r * 0x1f) / 0xff;
458         scrntype_t gg = ((scrntype_t)g * 0x1f) / 0xff;
459         scrntype_t bb = ((scrntype_t)b * 0x1f) / 0xff;
460         return (rr << 10) | (gg << 5) | bb;
461 }
462
463 scrntype_t DLL_PREFIX RGBA_COLOR(uint32_t r, uint32_t g, uint b, uint32_t a)
464 {
465         return RGB_COLOR(r, g, b);
466 }
467
468 uint8_t DLL_PREFIX R_OF_COLOR(scrntype_t c)
469 {
470         c = (c >> 10) & 0x1f;
471         c = (c * 0xff) / 0x1f;
472         return (uint8_t)c;
473 }
474
475 uint8_t DLL_PREFIX G_OF_COLOR(scrntype_t c)
476 {
477         c = (c >>  5) & 0x1f;
478         c = (c * 0xff) / 0x1f;
479         return (uint8_t)c;
480 }
481
482 uint8_t DLL_PREFIX B_OF_COLOR(scrntype_t c)
483 {
484         c = (c >>  0) & 0x1f;
485         c = (c * 0xff) / 0x1f;
486         return (uint8_t)c;
487 }
488
489 uint8_t DLL_PREFIX A_OF_COLOR(scrntype_t c)
490 {
491         return 0xff; //
492 }
493 #elif defined(_RGB565)
494 scrntype_t DLL_PREFIX RGB_COLOR(uint32_t r, uint32_t g, uint32_t b)
495 {
496         scrntype_t rr = ((scrntype_t)r * 0x1f) / 0xff;
497         scrntype_t gg = ((scrntype_t)g * 0x3f) / 0xff;
498         scrntype_t bb = ((scrntype_t)b * 0x1f) / 0xff;
499         return (rr << 11) | (gg << 5) | bb;
500 }
501
502 scrntype_t DLL_PREFIX RGBA_COLOR(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
503 {
504         return RGB_COLOR(r, g, b);
505 }
506
507 uint8_t DLL_PREFIX R_OF_COLOR(scrntype_t c)
508 {
509         c = (c >> 11) & 0x1f;
510         c = (c * 0xff) / 0x1f;
511         return (uint8_t)c;
512 }
513
514 uint8_t DLL_PREFIX G_OF_COLOR(scrntype_t c)
515 {
516         c = (c >>  5) & 0x3f;
517         c = (c * 0xff) / 0x3f;
518         return (uint8_t)c;
519 }
520
521 uint8_t DLL_PREFIX B_OF_COLOR(scrntype_t c)
522 {
523         c = (c >>  0) & 0x1f;
524         c = (c * 0xff) / 0x1f;
525         return (uint8_t)c;
526 }
527
528 uint8_t DLL_PREFIX A_OF_COLOR(scrntype_t c)
529 {
530         return 0xff; // Alpha = 255
531 }
532 #endif
533
534 // Note: table strongly recommend to be aligned by sizeof(uint16_vec8_t).
535 // This is sizeof(uint16) * 8, some compilers may require to align 16bytes(128)
536 // when using SIMD128 -- 20181105 K.O
537 void DLL_PREFIX PrepareBitTransTableUint16(_bit_trans_table_t *tbl, uint16_t on_val, uint16_t off_val)
538 {
539         if(tbl == NULL) return;
540         for(uint16_t i = 0; i < 256; i++) {
541                 uint16_t n = i;
542                 for(int j = 0; j < 8; j++) {
543                         tbl->plane_table[i].w[j] = ((n & 0x80) == 0) ? off_val : on_val;
544                         n <<= 1;
545                 }
546         }
547 }
548
549 // Note: table strongly recommend to be aligned by sizeof(scrntype_vec8_t).
550 // This is sizeof(uint16) * 8, some compilers may require to align 32bytes(256) or 16bytes(128)
551 // when using SIMD256 or SIMD128 -- 20181105 K.O
552 void DLL_PREFIX PrepareBitTransTableScrnType(_bit_trans_table_scrn_t *tbl, scrntype_t on_val, scrntype_t off_val)
553 {
554         if(tbl == NULL) return;
555         for(uint16_t i = 0; i < 256; i++) {
556                 uint16_t n = i;
557                 for(int j = 0; j < 8; j++) {
558                         tbl->plane_table[i].w[j] = ((n & 0x80) == 0) ? off_val : on_val;
559                         n <<= 1;
560                 }
561         }
562 }
563
564 // Prepare reverse byte-order table(s).
565 void DLL_PREFIX PrepareReverseBitTransTableUint16(_bit_trans_table_t *tbl, uint16_t on_val, uint16_t off_val)
566 {
567         if(tbl == NULL) return;
568         for(uint16_t i = 0; i < 256; i++) {
569                 uint16_t n = i;
570                 for(int j = 0; j < 8; j++) {
571                         tbl->plane_table[i].w[j] = ((n & 0x01) == 0) ? off_val : on_val;
572                         n >>= 1;
573                 }
574         }
575 }
576
577 void DLL_PREFIX PrepareReverseBitTransTableScrnType(_bit_trans_table_scrn_t *tbl, scrntype_t on_val, scrntype_t off_val)
578 {
579         if(tbl == NULL) return;
580         for(uint16_t i = 0; i < 256; i++) {
581                 uint16_t n = i;
582                 for(int j = 0; j < 8; j++) {
583                         tbl->plane_table[i].w[j] = ((n & 0x01) == 0) ? off_val : on_val;
584                         n >>= 1;
585                 }
586         }
587 }
588
589 // With _bit_trans_table_scrn_t.
590 void DLL_PREFIX ConvertByteToPackedPixelByColorTable2(uint8_t *src, scrntype_t* dst, int bytes, _bit_trans_table_scrn_t *tbl, scrntype_t *on_color_table, scrntype_t* off_color_table)
591 {
592         
593     __DECL_ALIGNED(32) scrntype_vec8_t tmpd;
594         __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
595         __DECL_ALIGNED(32) scrntype_vec8_t colors;
596         scrntype_vec8_t* vt = (scrntype_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(scrntype_vec8_t));
597         
598         uintptr_t disalign = (uintptr_t)dst;
599         disalign = disalign & (sizeof(scrntype_vec8_t) - 1); //Is align by 128bits or 256bytes?
600         if(disalign == 0) {
601                 // Yes.
602                 scrntype_vec8_t *vdst = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
603 __DECL_VECTORIZED_LOOP
604                 for(int i = 0; i < bytes; i++) {
605                         tmpd.v = vt[src[i]].v;
606                         tmpdd.v = ~tmpd.v;
607                         
608 __DECL_VECTORIZED_LOOP
609                         for(int j = 0; j < 8; j++) {
610                                 colors.w[j] = on_color_table[j];
611                         }
612                         tmpd.v = tmpd.v & colors.v;
613 __DECL_VECTORIZED_LOOP
614                         for(int j = 0; j < 8; j++) {
615                                 colors.w[j] = off_color_table[j];
616                         }
617                         tmpdd.v = tmpdd.v & colors.v;
618                         vdst->v = (tmpd.v | tmpdd.v);
619                         off_color_table += 8;
620                         on_color_table += 8;
621                         vdst++;
622                 }
623         } else {
624                 // Sorry, not aligned.
625 __DECL_VECTORIZED_LOOP
626                 for(int i = 0; i < bytes; i++) {
627                         tmpd.v = vt[src[i]].v;
628                         tmpdd.v = ~tmpd.v;
629                         
630 __DECL_VECTORIZED_LOOP
631                         for(int j = 0; j < 8; j++) {
632                                 colors.w[j] = on_color_table[j];
633                         }
634                         tmpd.v = tmpd.v & colors.v;
635 __DECL_VECTORIZED_LOOP
636                         for(int j = 0; j < 8; j++) {
637                                 colors.w[j] = off_color_table[j];
638                         }
639                         tmpdd.v = tmpdd.v & colors.v;
640                         tmpdd.v = tmpdd.v | tmpd.v;
641 __DECL_VECTORIZED_LOOP
642                         for(int j = 0; j < 8; j++) {
643                                 dst[j] = tmpdd.w[j];
644                         }
645                         off_color_table += 8;
646                         on_color_table += 8;
647                         dst += 8;
648                 }
649         }
650 }
651
652
653 // Convert uint8_t[] ed VRAM to uint16_t[] mono pixel pattern.
654 // You must set table to "ON_VALUE" : "OFF_VALUE" via PrepareBitTransTableUint16().
655 // -- 20181105 K.O
656 void DLL_PREFIX ConvertByteToSparceUint16(uint8_t *src, uint16_t* dst, int bytes, _bit_trans_table_t *tbl, uint16_t mask)
657 {
658         
659         __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
660         uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
661
662         __DECL_ALIGNED(16) uint16_vec8_t __masks;
663
664 __DECL_VECTORIZED_LOOP
665         for(int i = 0; i < 8; i++) {
666                 __masks.w[i] = mask;
667         }
668         uintptr_t disalign = (uintptr_t)dst;
669         disalign = disalign & 0x0f; //Is align by 128bits?
670         if(disalign == 0) {
671                 // Yes.
672                 uint16_vec8_t *vdst = (uint16_vec8_t*)__builtin_assume_aligned(dst, sizeof(uint16_vec8_t));
673 __DECL_VECTORIZED_LOOP
674                 for(int i = 0; i < bytes; i++) {
675                         tmpd.v = vt[src[i]].v;
676                         tmpd.v = tmpd.v & __masks.v;
677                         vdst->v = tmpd.v;
678                         vdst++;
679                 }
680         } else {
681                 // Sorry, not aligned.
682 __DECL_VECTORIZED_LOOP
683                 for(int i = 0; i < bytes; i++) {
684                         tmpd.v = vt[src[i]].v;
685                         tmpd.v = tmpd.v & __masks.v;
686 __DECL_VECTORIZED_LOOP
687                         for(int j = 0; j < 8; j++) {
688                                 dst[j] = tmpd.w[j];
689                         }
690                         dst += 8;
691                 }
692         }
693 }
694
695 // Convert uint8_t[] ed VRAM to uint8_t[] mono pixel pattern.
696 // You must set table to "ON_VALUE" : "OFF_VALUE" via PrepareBitTransTableUint16().
697 // -- 20181105 K.O
698 void DLL_PREFIX ConvertByteToSparceUint8(uint8_t *src, uint16_t* dst, int bytes, _bit_trans_table_t *tbl, uint16_t mask)
699 {
700         
701         __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
702         uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
703
704         __DECL_ALIGNED(16) uint16_vec8_t __masks;
705         __DECL_ALIGNED(16) uint8_vec8_t tmpdd;
706
707 __DECL_VECTORIZED_LOOP
708         for(int i = 0; i < 8; i++) {
709                 __masks.w[i] = mask;
710         }
711         uintptr_t disalign = (uintptr_t)dst;
712         disalign = disalign & 0x07; //Is align by 128bits?
713         if(disalign == 0) {
714                 // Yes.
715                 uint8_vec8_t *vdst = (uint8_vec8_t*)__builtin_assume_aligned(dst, sizeof(uint8_vec8_t));
716 __DECL_VECTORIZED_LOOP
717                 for(int i = 0; i < bytes; i++) {
718                         tmpd.v = vt[src[i]].v;
719                         tmpd.v = tmpd.v & __masks.v;
720 __DECL_VECTORIZED_LOOP
721                         for(int j = 0; j < 8; j++) {
722                                 tmpdd.w[j] = (uint8_t)(tmpd.w[j]);
723                         }
724                         vdst->v = tmpdd.v;
725                         vdst++;
726                 }
727         } else {
728                 // Sorry, not aligned.
729 __DECL_VECTORIZED_LOOP
730                 for(int i = 0; i < bytes; i++) {
731                         tmpd.v = vt[src[i]].v;
732                         tmpd.v = tmpd.v & __masks.v;
733 __DECL_VECTORIZED_LOOP
734                         for(int j = 0; j < 8; j++) {
735                                 dst[j] = (uint8_t)(tmpd.w[j]);
736                         }
737                         dst += 8;
738                 }
739         }
740 }
741
742
743 void DLL_PREFIX ConvertByteToPackedPixelByColorTable(uint8_t *src, scrntype_t* dst, int bytes, _bit_trans_table_t *tbl, scrntype_t *on_color_table, scrntype_t* off_color_table)
744 {
745         
746         __DECL_ALIGNED(16) uint16_vec8_t   tmpd;
747         __DECL_ALIGNED(32) scrntype_vec8_t tmpdd;
748         uint16_vec8_t*  vt = (uint16_vec8_t*)__builtin_assume_aligned(&(tbl->plane_table[0]), sizeof(uint16_vec8_t));
749         
750         uintptr_t disalign = (uintptr_t)dst;
751         disalign = disalign & 0x0f; //Is align by 128bits?
752         if(disalign == 0) {
753                 // Yes.
754                 scrntype_vec8_t *vdst = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
755 __DECL_VECTORIZED_LOOP
756                 for(int i = 0; i < bytes; i++) {
757                         tmpd.v = vt[src[i]].v;
758 __DECL_VECTORIZED_LOOP
759                         for(int j = 0; j < 8; j++) {
760                                 tmpdd.w[j] = (tmpd.w[j] == 0) ? off_color_table[j] : on_color_table[j];
761                         }
762                         vdst->v = tmpdd.v;
763                         off_color_table += 8;
764                         on_color_table += 8;
765                         vdst++;
766                 }
767         } else {
768                 // Sorry, not aligned.
769 __DECL_VECTORIZED_LOOP
770                 for(int i = 0; i < bytes; i++) {
771                         tmpd.v = vt[src[i]].v;
772 __DECL_VECTORIZED_LOOP
773                         for(int j = 0; j < 8; j++) {
774                                 dst[j] = (tmpd.w[j] == 0) ? off_color_table[j] : on_color_table[j];
775                         }
776                         off_color_table += 8;
777                         on_color_table += 8;
778                         dst += 8;
779                 }
780         }
781 }
782
783
784 void DLL_PREFIX Render8Colors_Line(_render_command_data_t *src, scrntype_t *dst, scrntype_t* dst2, bool scan_line)
785 {
786         if(src == NULL) return;
787         if(dst == NULL) return;
788
789 //__DECL_VECTORIZED_LOOP
790 //      for(int i = 0; i < 3; i++) {
791 //              if(src->bit_trans_table[i] == NULL) return;
792 //              if(src->data[i] == NULL) return;
793 //      }
794         scrntype_t dummy_palette[8]; // fallback
795         scrntype_t *palette = src->palette;
796         
797         uint16_vec8_t *vpb = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[0], sizeof(uint16_vec8_t));
798         uint16_vec8_t *vpr = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[1], sizeof(uint16_vec8_t));
799         uint16_vec8_t *vpg = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[2], sizeof(uint16_vec8_t));
800
801         uint32_t x;
802         __DECL_ALIGNED(16) uint32_t offset[4] = {0};
803         __DECL_ALIGNED(16) uint32_t beginaddr[4] = {0};
804         uint32_t mask = src->addrmask;
805         uint32_t offsetmask = src->addrmask2;
806 __DECL_VECTORIZED_LOOP
807         for(int i = 0; i < 3; i++) {
808                 offset[i] = src->voffset[i];
809         }
810         if(palette == NULL) {
811 __DECL_VECTORIZED_LOOP
812                 for(int i = 0; i < 8; i++) {
813                         dummy_palette[i] = RGB_COLOR(((i & 2) << 5) | 0x1f,
814                                                                                  ((i & 4) << 5) | 0x1f,
815                                                                                  ((i & 1) << 5) | 0x1f);
816                 }
817                 palette = dummy_palette;
818         }
819         uint8_t *bp = &(src->data[0][src->baseaddress[0]]);
820         uint8_t *rp = &(src->data[1][src->baseaddress[1]]);
821         uint8_t *gp = &(src->data[2][src->baseaddress[2]]);
822         
823         uint8_t r, g, b;
824         int shift = src->shift;
825         const bool is_render[3] = { src->is_render[0], src->is_render[1],  src->is_render[2] };
826         __DECL_ALIGNED(16) uint16_vec8_t tmpd;
827         __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; 
828         scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
829         
830         x = src->begin_pos;
831         uint32_t n = x;
832         if(dst2 == NULL) {
833         __DECL_VECTORIZED_LOOP
834                 for(uint32_t xx = 0; xx < src->render_width; xx++) {
835                         b = (is_render[0]) ? bp[(offset[0] + n) & mask] : 0;
836                         r = (is_render[1]) ? rp[(offset[1] + n) & mask] : 0;
837                         g = (is_render[2]) ? gp[(offset[2] + n) & mask] : 0;
838                         tmpd.v = vpb[b].v;
839                         tmpd.v = tmpd.v | vpr[r].v;
840                         tmpd.v = tmpd.v | vpg[g].v;
841                         tmpd.v = tmpd.v >> shift;
842                         n = (n + 1) & offsetmask;
843         __DECL_VECTORIZED_LOOP
844                         for(int i = 0; i < 8; i++) {
845                                 tmp_dd.w[i] = palette[tmpd.w[i]];
846                         }
847                         vdp[xx].v = tmp_dd.v;
848                 }
849         } else {
850 #if defined(_RGB555) || defined(_RGBA565)
851                 static const int shift_factor = 2;
852 #else // 24bit
853                 static const int shift_factor = 3;
854 #endif
855                 __DECL_ALIGNED(32) scrntype_vec8_t sline;
856                 scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
857         __DECL_VECTORIZED_LOOP
858                 for(int i = 0; i < 8; i++) {
859                         sline.w[i] = (scrntype_t)RGBA_COLOR(31, 31, 31, 255);
860                 }
861         __DECL_VECTORIZED_LOOP
862                 for(uint32_t xx = 0; xx < src->render_width; xx++) {
863                         b = (is_render[0]) ? bp[(offset[0] + n) & mask] : 0;
864                         r = (is_render[1]) ? rp[(offset[1] + n) & mask] : 0;
865                         g = (is_render[2]) ? gp[(offset[2] + n) & mask] : 0;
866                         tmpd.v = vpb[b].v;
867                         tmpd.v = tmpd.v | vpr[r].v;
868                         tmpd.v = tmpd.v | vpg[g].v;
869                         tmpd.v = tmpd.v >> shift;
870                         n = (n + 1) & offsetmask;
871         __DECL_VECTORIZED_LOOP
872                         for(int i = 0; i < 8; i++) {
873                                 tmp_dd.w[i] = palette[tmpd.w[i]];
874                         }
875                         vdp[xx].v = tmp_dd.v;
876                         if(scan_line) {
877                                 tmp_dd.v = tmp_dd.v >> shift_factor;
878                                 tmp_dd.v = tmp_dd.v & sline.v;
879                         }
880                         vdp2[xx].v = tmp_dd.v;
881                 }
882         }
883 }
884
885 void DLL_PREFIX Render16Colors_Line(_render_command_data_t *src, scrntype_t *dst, scrntype_t* dst2, bool scan_line)
886 {
887         if(src == NULL) return;
888         if(dst == NULL) return;
889
890 //__DECL_VECTORIZED_LOOP
891 //      for(int i = 0; i < 3; i++) {
892 //              if(src->bit_trans_table[i] == NULL) return;
893 //              if(src->data[i] == NULL) return;
894 //      }
895         scrntype_t dummy_palette[16]; // fallback
896         scrntype_t *palette = src->palette;
897         
898         uint16_vec8_t *vpb = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[0], sizeof(uint16_vec8_t));
899         uint16_vec8_t *vpr = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[1], sizeof(uint16_vec8_t));
900         uint16_vec8_t *vpg = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[2], sizeof(uint16_vec8_t));
901         uint16_vec8_t *vpn = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[3], sizeof(uint16_vec8_t));
902
903         uint32_t x;
904         __DECL_ALIGNED(16) uint32_t offset[4];
905         __DECL_ALIGNED(16) uint32_t beginaddr[4];
906         uint32_t mask = src->addrmask;
907         uint32_t offsetmask = src->addrmask2;
908         
909 __DECL_VECTORIZED_LOOP
910         for(int i = 0; i < 4; i++) {
911                 offset[i] = src->voffset[i];
912         }
913         if(palette == NULL) {
914 __DECL_VECTORIZED_LOOP
915                 for(int i = 0; i < 16; i++) {
916                         dummy_palette[i] = RGB_COLOR((((i & 2) + (i & 8)) << 4) | 0x0f,
917                                                                                  (((i & 4) + (i & 8)) << 4) | 0x0f,
918                                                                                  (((i & 1) + (i & 8)) << 4) | 0x0f);
919                 }
920                 palette = dummy_palette;
921         }
922         uint8_t *bp = &(src->data[0][src->baseaddress[0]]);
923         uint8_t *rp = &(src->data[1][src->baseaddress[1]]);
924         uint8_t *gp = &(src->data[2][src->baseaddress[2]]);
925         uint8_t *np = &(src->data[3][src->baseaddress[3]]);
926         
927         uint8_t r, g, b, n;
928         int shift = src->shift;
929         const bool is_render[4] = { src->is_render[0], src->is_render[1],  src->is_render[2], src->is_render[3] };
930         __DECL_ALIGNED(16) uint16_vec8_t tmpd;
931         __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; 
932         scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
933         
934         x = src->begin_pos;
935         uint32_t xn = x;
936         if(dst2 == NULL) {      
937         __DECL_VECTORIZED_LOOP
938                 for(uint32_t xx = 0; xx < src->render_width; xx++) {
939                         b = (is_render[0]) ? bp[(offset[0] + xn) & mask] : 0;
940                         r = (is_render[1]) ? rp[(offset[1] + xn) & mask] : 0;
941                         g = (is_render[2]) ? gp[(offset[2] + xn) & mask] : 0;
942                         n = (is_render[3]) ? np[(offset[3] + xn) & mask] : 0;
943                         tmpd.v = vpb[b].v;
944                         tmpd.v = tmpd.v | vpr[r].v;
945                         tmpd.v = tmpd.v | vpg[g].v;
946                         tmpd.v = tmpd.v | vpn[n].v;
947                         tmpd.v = tmpd.v >> shift;
948                         xn = (xn + 1) & offsetmask;
949         __DECL_VECTORIZED_LOOP
950                         for(int i = 0; i < 8; i++) {
951                                 tmp_dd.w[i] = palette[tmpd.w[i]];
952                         }
953                         vdp[xx].v = tmp_dd.v;
954                 }
955         } else {
956 #if defined(_RGB555) || defined(_RGBA565)
957                 static const int shift_factor = 2;
958 #else // 24bit
959                 static const int shift_factor = 3;
960 #endif
961                 __DECL_ALIGNED(32) scrntype_vec8_t sline;
962                 scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
963         __DECL_VECTORIZED_LOOP
964                 for(int i = 0; i < 8; i++) {
965                         sline.w[i] = (scrntype_t)RGBA_COLOR(31, 31, 31, 255);
966                 }
967         __DECL_VECTORIZED_LOOP
968                 for(uint32_t xx = 0; xx < src->render_width; xx++) {
969                         b = (is_render[0]) ? bp[(offset[0] + xn) & mask] : 0;
970                         r = (is_render[1]) ? rp[(offset[1] + xn) & mask] : 0;
971                         g = (is_render[2]) ? gp[(offset[2] + xn) & mask] : 0;
972                         n = (is_render[3]) ? np[(offset[3] + xn) & mask] : 0;
973                         tmpd.v = vpb[b].v;
974                         tmpd.v = tmpd.v | vpr[r].v;
975                         tmpd.v = tmpd.v | vpg[g].v;
976                         tmpd.v = tmpd.v | vpn[n].v;
977                         tmpd.v = tmpd.v >> shift;
978                         xn = (xn + 1) & offsetmask;
979         __DECL_VECTORIZED_LOOP
980                         for(int i = 0; i < 8; i++) {
981                                 tmp_dd.w[i] = palette[tmpd.w[i]];
982                         }
983                         vdp[xx].v = tmp_dd.v;
984                         if(scan_line) {
985                                 tmp_dd.v = tmp_dd.v >> shift_factor;
986                                 tmp_dd.v = tmp_dd.v & sline.v;
987                         }
988                         vdp2[xx].v = tmp_dd.v;
989                 }
990         }
991 }
992
993 // src->palette Must be 2^planes entries.
994 void DLL_PREFIX Render2NColors_Line(_render_command_data_t *src, scrntype_t *dst, scrntype_t* dst2, bool scan_line, int planes)
995 {
996         if(src == NULL) return;
997         if(dst == NULL) return;
998         if(src->palette == NULL) return;
999         if(planes <= 0) return;
1000         if(planes >= 16) planes = 16;
1001 //__DECL_VECTORIZED_LOOP
1002 //      for(int i = 0; i < 3; i++) {
1003 //              if(src->bit_trans_table[i] == NULL) return;
1004 //              if(src->data[i] == NULL) return;
1005 //      }
1006         scrntype_t *palette = src->palette;
1007         
1008         uint16_vec8_t* vp[16];
1009         for(int i = 0; i < planes; i++) {
1010                 vp[i] = (uint16_vec8_t*)__builtin_assume_aligned(src->bit_trans_table[i], sizeof(uint16_vec8_t));
1011         }
1012
1013         uint32_t x;
1014         __DECL_ALIGNED(16) uint32_t offset[16];
1015         __DECL_ALIGNED(16) uint32_t beginaddr[16];
1016         uint32_t mask = src->addrmask;
1017         uint32_t offsetmask = src->addrmask2;
1018 __DECL_VECTORIZED_LOOP
1019         for(int i = 0; i < planes; i++) {
1020                 offset[i] = src->voffset[i];
1021         }
1022         uint8_t *pp[16];
1023         for(int i = 0; i < planes; i++) {
1024                 pp[i] = &(src->data[i][src->baseaddress[i]]);
1025         }
1026         
1027         uint8_t d[16];
1028         int shift = src->shift;
1029         const bool is_render[4] = { src->is_render[0], src->is_render[1],  src->is_render[2], src->is_render[3] };
1030         __DECL_ALIGNED(16) uint16_vec8_t tmpd;
1031         __DECL_ALIGNED(32) scrntype_vec8_t tmp_dd; 
1032         scrntype_vec8_t* vdp = (scrntype_vec8_t*)__builtin_assume_aligned(dst, sizeof(scrntype_vec8_t));
1033         
1034         x = src->begin_pos;
1035         if(dst2 == NULL) {
1036                 uint32_t n = x;
1037         __DECL_VECTORIZED_LOOP
1038                 for(uint32_t xx = 0; xx < src->render_width; xx++) {
1039                         d[0] = (is_render[0]) ? pp[0][(offset[0] + n) & mask] : 0;
1040                         tmpd.v = vp[0][d[0]].v;
1041         __DECL_VECTORIZED_LOOP
1042                         for(int i = 1; i < planes; i++) {
1043                                 d[i] = (is_render[i]) ? pp[i][(offset[i] + n) & mask] : 0;
1044                                 tmpd.v = tmpd.v | vp[i][d[i]].v;
1045                         }
1046                         n = (n + 1) & offsetmask;
1047                         tmpd.v = tmpd.v >> shift;
1048         __DECL_VECTORIZED_LOOP
1049                         for(int i = 0; i < 8; i++) {
1050                                 tmp_dd.w[i] = palette[tmpd.w[i]];
1051                         }
1052                         vdp[xx].v = tmp_dd.v;
1053                 }
1054         } else {
1055 #if defined(_RGB555) || defined(_RGBA565)
1056                 static const int shift_factor = 2;
1057 #else // 24bit
1058                 static const int shift_factor = 3;
1059 #endif
1060                 __DECL_ALIGNED(32) scrntype_vec8_t sline;
1061                 scrntype_vec8_t* vdp2 = (scrntype_vec8_t*)__builtin_assume_aligned(dst2, sizeof(scrntype_vec8_t));
1062         __DECL_VECTORIZED_LOOP
1063                 for(int i = 0; i < 8; i++) {
1064                         sline.w[i] = (scrntype_t)RGBA_COLOR(31, 31, 31, 255);
1065                 }
1066                 uint32_t n = x;
1067         __DECL_VECTORIZED_LOOP
1068                 for(uint32_t xx = 0; xx < src->render_width; xx++) {
1069                         d[0] = (is_render[0]) ? pp[0][(offset[0] + n) & mask] : 0;
1070                         tmpd.v = vp[0][d[0]].v;
1071         __DECL_VECTORIZED_LOOP
1072                         for(int i = 1; i < planes; i++) {
1073                                 d[i] = (is_render[i]) ? pp[i][(offset[i] + n) & mask] : 0;
1074                                 tmpd.v = tmpd.v | vp[i][d[i]].v;
1075                         }
1076                         n = (n + 1) & offsetmask;
1077                         tmpd.v = tmpd.v >> shift;
1078         __DECL_VECTORIZED_LOOP
1079                         for(int i = 0; i < 8; i++) {
1080                                 tmp_dd.w[i] = palette[tmpd.w[i]];
1081                         }
1082                         vdp[xx].v = tmp_dd.v;
1083                         if(scan_line) {
1084                                 tmp_dd.v = tmp_dd.v >> shift_factor;
1085                                 tmp_dd.v = tmp_dd.v & sline.v;
1086                         }
1087                         vdp2[xx].v = tmp_dd.v;
1088                 }
1089         }
1090 }
1091
1092 void DLL_PREFIX Convert2NColorsToByte_Line(_render_command_data_t *src, uint8_t *dst, int planes)
1093 {
1094         if(planes >= 8) planes = 8;
1095         if(planes <= 0) return;
1096
1097         uint8_t* srcp[8];
1098         __DECL_ALIGNED(32) uint32_t offset[8] = {0};
1099         __DECL_ALIGNED(16) uint16_vec8_t dat;
1100         uint16_vec8_t* bp[8] ;
1101                 
1102 __DECL_VECTORIZED_LOOP
1103                 for(int i = 0; i < planes; i++) {
1104                 bp[i] = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[i]->plane_table[0]), sizeof(uint16_vec8_t));
1105                 srcp[i] = &(src->data[i][src->baseaddress[i]]);
1106         }
1107         uint32_t addrmask = src->addrmask;
1108         uint32_t offsetmask = src->addrmask2;
1109         int shift = src->shift;
1110         
1111 __DECL_VECTORIZED_LOOP
1112         for(int i = 0; i < planes; i++) {
1113                 offset[i] = src->voffset[i];
1114         }
1115
1116         uint32_t noffset = src->begin_pos & offsetmask;
1117         uint8_t td[16];
1118 __DECL_VECTORIZED_LOOP
1119         for(int x = 0; x < src->render_width; x++) {
1120 __DECL_VECTORIZED_LOOP
1121                 for(int i = 0; i < planes; i++) {
1122                         td[i] = srcp[i][(noffset + offset[i]) & addrmask];
1123                 }
1124                 noffset = (noffset + 1) & offsetmask;
1125                 dat.v = bp[0][td[0]].v;
1126 __DECL_VECTORIZED_LOOP
1127                 for(int i = 1; i < planes; i++) {
1128                         dat.v = dat.v | bp[i][td[i]].v;
1129                 }
1130                 dat.v = dat.v >> shift;
1131 __DECL_VECTORIZED_LOOP
1132                 for(int i = 0; i < 8; i++) {
1133                         dst[i] = (uint8_t)(dat.w[i]);
1134                 }
1135                 dst += 8;
1136                 
1137         }
1138 }
1139
1140 void DLL_PREFIX Convert2NColorsToByte_LineZoom2(_render_command_data_t *src, uint8_t *dst, int planes)
1141 {
1142         if(planes >= 8) planes = 8;
1143         if(planes <= 0) return;
1144
1145         uint8_t* srcp[8];
1146         __DECL_ALIGNED(32) uint32_t offset[8] = {0};
1147         __DECL_ALIGNED(16) uint16_vec8_t dat;
1148         uint16_vec8_t* bp[8] ;
1149                 
1150 __DECL_VECTORIZED_LOOP
1151                 for(int i = 0; i < planes; i++) {
1152                 bp[i] = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[i]->plane_table[0]), sizeof(uint16_vec8_t));
1153                 srcp[i] = &(src->data[i][src->baseaddress[i]]);
1154         }
1155         uint32_t addrmask = src->addrmask;
1156         uint32_t offsetmask = src->addrmask2;
1157         int shift = src->shift;
1158         
1159 __DECL_VECTORIZED_LOOP
1160         for(int i = 0; i < planes; i++) {
1161                 offset[i] = src->voffset[i];
1162         }
1163
1164         uint32_t noffset = src->begin_pos & offsetmask;
1165         uint8_t td[16];
1166 __DECL_VECTORIZED_LOOP
1167         for(int x = 0; x < src->render_width; x++) {
1168 __DECL_VECTORIZED_LOOP
1169                 for(int i = 0; i < planes; i++) {
1170                         td[i] = srcp[i][(noffset + offset[i]) & addrmask];
1171                 }
1172                 noffset = (noffset + 1) & offsetmask;
1173                 dat.v = bp[0][td[0]].v;
1174 __DECL_VECTORIZED_LOOP
1175                 for(int i = 1; i < planes; i++) {
1176                         dat.v = dat.v | bp[i][td[i]].v;
1177                 }
1178                 dat.v = dat.v >> shift;
1179 __DECL_VECTORIZED_LOOP
1180         for(int i = 0, j = 0; i < 16; i +=2, j++) {
1181                         dst[i]     = (uint8_t)(dat.w[j]);
1182                         dst[i + 1] = (uint8_t)(dat.w[j]);
1183                 }
1184                 dst += 16;
1185         }
1186 }
1187
1188 void DLL_PREFIX Convert8ColorsToByte_Line(_render_command_data_t *src, uint8_t *dst)
1189 {
1190         uint8_t *bp = &(src->data[0][src->baseaddress[0]]);
1191         uint8_t *rp = &(src->data[1][src->baseaddress[1]]);
1192         uint8_t *gp = &(src->data[2][src->baseaddress[2]]);
1193         __DECL_ALIGNED(16) uint32_t offset[4] = {0};
1194
1195         __DECL_ALIGNED(16) uint16_vec8_t rdat;
1196         __DECL_ALIGNED(16) uint16_vec8_t gdat;
1197         __DECL_ALIGNED(16) uint16_vec8_t bdat;
1198         __DECL_ALIGNED(16) uint16_vec8_t tmpd;
1199
1200         uint16_vec8_t* bpb = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[0]->plane_table[0]), sizeof(uint16_vec8_t));
1201         uint16_vec8_t* bpr = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[1]->plane_table[0]), sizeof(uint16_vec8_t));
1202         uint16_vec8_t* bpg = (uint16_vec8_t*)__builtin_assume_aligned(&(src->bit_trans_table[2]->plane_table[0]), sizeof(uint16_vec8_t));
1203         
1204         uint32_t addrmask = src->addrmask;
1205         uint32_t offsetmask = src->addrmask2;
1206         int shift = src->shift;
1207         
1208 __DECL_VECTORIZED_LOOP
1209         for(int i = 0; i < 3; i++) {
1210                 offset[i] = src->voffset[i];
1211         }
1212
1213         uint32_t noffset = src->begin_pos & offsetmask;
1214         uint8_t b, r, g;
1215 __DECL_VECTORIZED_LOOP
1216         for(int x = 0; x < src->render_width; x++) {
1217                 b = bp[(noffset + offset[0]) & addrmask];
1218                 r = rp[(noffset + offset[1]) & addrmask];
1219                 g = gp[(noffset + offset[2]) & addrmask];
1220
1221                 noffset = (noffset + 1) & offsetmask;
1222                 
1223                 bdat.v = bpb[b].v;
1224                 rdat.v = bpr[r].v;
1225                 gdat.v = bpg[g].v;
1226                 tmpd.v = bdat.v;
1227                 tmpd.v = tmpd.v | rdat.v;
1228                 tmpd.v = tmpd.v | gdat.v;
1229                 tmpd.v = tmpd.v >> shift;
1230
1231 __DECL_VECTORIZED_LOOP
1232                 for(int i = 0; i < 8; i++) {
1233                         dst[i] = (uint8_t)(tmpd.w[i]);
1234                 }
1235                 dst += 8;
1236         }
1237 }
1238         
1239
1240 #ifndef _MSC_VER
1241 struct to_upper {  // Refer from documentation of libstdc++, GCC5.
1242         char operator() (char c) const { return std::toupper(c); }
1243 };
1244 #endif
1245
1246 #if defined(_USE_QT)
1247 static void _my_mkdir(std::string t_dir)
1248 {
1249         struct stat st;
1250 //#if !defined(__WIN32) && !defined(__WIN64)
1251 //      if(fstatat(AT_FDCWD, csppath.c_str(), &st, 0) != 0) {
1252 //              mkdirat(AT_FDCWD, t_dir.c_str(), 0700); // Not found
1253 //      }
1254 #if defined(_USE_QT)
1255         if(stat(t_dir.c_str(), &st) != 0) {
1256                 QDir dir = QDir::current();
1257                 dir.mkdir(QString::fromStdString(t_dir));
1258                 //dir.mkpath(QString::fromUtf8(app_path));
1259         }
1260 #else
1261         if(stat(csppath.c_str(), &st) != 0) {
1262                 _mkdir(t_dir.c_str()); // Not found
1263         }
1264 #endif
1265 }
1266 #endif
1267
1268 const _TCHAR *DLL_PREFIX get_application_path()
1269 {
1270         static _TCHAR app_path[_MAX_PATH];
1271         static bool initialized = false;
1272         
1273         if(!initialized) {
1274 #if defined(_WIN32) && !defined(_USE_QT)
1275                 _TCHAR tmp_path[_MAX_PATH], *ptr = NULL;
1276                 if(GetModuleFileName(NULL, tmp_path, _MAX_PATH) != 0 && GetFullPathName(tmp_path, _MAX_PATH, app_path, &ptr) != 0 && ptr != NULL) {
1277                         *ptr = _T('\0');
1278                 } else {
1279                         my_tcscpy_s(app_path, _MAX_PATH, _T(".\\"));
1280                 }
1281 #else
1282 #if defined(Q_OS_WIN)
1283                 std::string delim = "\\";
1284 #else
1285                 std::string delim = "/";
1286 #endif
1287                 std::string csppath = cpp_homedir + "CommonSourceCodeProject" + delim ;
1288                 _my_mkdir(csppath);
1289            
1290                 std::string cpath = csppath + my_procname + delim;
1291                 _my_mkdir(cpath);
1292                 strncpy(app_path, cpath.c_str(), _MAX_PATH - 1);
1293 #endif
1294                 initialized = true;
1295         }
1296         return (const _TCHAR *)app_path;
1297 }
1298
1299 const _TCHAR *DLL_PREFIX get_initial_current_path()
1300 {
1301         static _TCHAR current_path[_MAX_PATH];
1302         static bool initialized = false;
1303         
1304         if(!initialized) {
1305 #if defined(_WIN32) && !defined(_USE_QT)
1306                 GetCurrentDirectoryA(_MAX_PATH, current_path);
1307 #else
1308                 getcwd(current_path, _MAX_PATH);
1309 #endif
1310                 int len = strlen(current_path);
1311                 if(current_path[len - 1] != '\\' && current_path[len - 1] != '/') {
1312 #if defined(_WIN32) || defined(Q_OS_WIN)
1313                         current_path[len] = '\\';
1314 #else
1315                         current_path[len] = '/';
1316 #endif
1317                         current_path[len + 1] = '\0';
1318                 }
1319
1320                 initialized = true;
1321         }
1322         return (const _TCHAR *)current_path;
1323 }
1324
1325 const _TCHAR *DLL_PREFIX create_local_path(const _TCHAR *format, ...)
1326 {
1327         static _TCHAR file_path[8][_MAX_PATH];
1328         static unsigned int table_index = 0;
1329         unsigned int output_index = (table_index++) & 7;
1330         _TCHAR file_name[_MAX_PATH];
1331         //printf("%d %d\n", table_index, output_index);
1332         va_list ap;
1333         
1334         va_start(ap, format);
1335         my_vstprintf_s(file_name, _MAX_PATH, format, ap);
1336         va_end(ap);
1337         my_stprintf_s(file_path[output_index], _MAX_PATH, _T("%s%s"), get_application_path(), file_name);
1338         return (const _TCHAR *)file_path[output_index];
1339 }
1340
1341 void DLL_PREFIX create_local_path(_TCHAR *file_path, int length, const _TCHAR *format, ...)
1342 {
1343         _TCHAR file_name[_MAX_PATH];
1344         va_list ap;
1345         
1346         va_start(ap, format);
1347         my_vstprintf_s(file_name, _MAX_PATH, format, ap);
1348         va_end(ap);
1349         my_stprintf_s(file_path, length, _T("%s%s"), get_application_path(), file_name);
1350 }
1351
1352 bool DLL_PREFIX is_absolute_path(const _TCHAR *file_path)
1353 {
1354 #ifdef _WIN32
1355         if(_tcslen(file_path) > 2 && ((file_path[0] >= _T('A') && file_path[0] <= _T('Z')) || (file_path[0] >= _T('a') && file_path[0] <= _T('z'))) && file_path[1] == _T(':')) {
1356                 return true;
1357         }
1358 #endif
1359         return (_tcslen(file_path) > 1 && (file_path[0] == _T('/') || file_path[0] == _T('\\')));
1360 }
1361
1362 const _TCHAR *DLL_PREFIX create_date_file_path(const _TCHAR *extension)
1363 {
1364         cur_time_t cur_time;
1365         
1366         get_host_time(&cur_time);
1367         return create_local_path(_T("%d-%0.2d-%0.2d_%0.2d-%0.2d-%0.2d.%s"), cur_time.year, cur_time.month, cur_time.day, cur_time.hour, cur_time.minute, cur_time.second, extension);
1368 }
1369
1370 void DLL_PREFIX create_date_file_path(_TCHAR *file_path, int length, const _TCHAR *extension)
1371 {
1372         my_tcscpy_s(file_path, length, create_date_file_path(extension));
1373 }
1374
1375 const _TCHAR *DLL_PREFIX create_date_file_name(const _TCHAR *extension)
1376 {
1377         static _TCHAR file_name[8][_MAX_PATH];
1378         static unsigned int table_index = 0;
1379         unsigned int output_index = (table_index++) & 7;
1380         cur_time_t cur_time;
1381         
1382         get_host_time(&cur_time);
1383         my_stprintf_s(file_name[output_index], _MAX_PATH, _T("%d-%0.2d-%0.2d_%0.2d-%0.2d-%0.2d.%s"), cur_time.year, cur_time.month, cur_time.day, cur_time.hour, cur_time.minute, cur_time.second, extension);
1384         return (const _TCHAR *)file_name[output_index];
1385 }
1386
1387 void DLL_PREFIX create_date_file_name(_TCHAR *file_path, int length, const _TCHAR *extension)
1388 {
1389         my_tcscpy_s(file_path, length, create_date_file_name(extension));
1390 }
1391
1392 bool DLL_PREFIX check_file_extension(const _TCHAR *file_path, const _TCHAR *ext)
1393 {
1394 #if defined(_USE_QT)
1395         std::string s_fpath = file_path;
1396         std::string s_ext = ext;
1397         //bool f = false;
1398         int pos;
1399         std::transform(s_fpath.begin(), s_fpath.end(), s_fpath.begin(), to_upper());
1400         std::transform(s_ext.begin(), s_ext.end(), s_ext.begin(), to_upper());
1401         if(s_fpath.length() < s_ext.length()) return false;
1402         pos = s_fpath.rfind(s_ext.c_str(), s_fpath.length());
1403         if((pos != (int)std::string::npos) && (pos >= ((int)s_fpath.length() - (int)s_ext.length()))) return true; 
1404         return false;
1405 #else
1406         int nam_len = _tcslen(file_path);
1407         int ext_len = _tcslen(ext);
1408         
1409         return (nam_len >= ext_len && _tcsncicmp(&file_path[nam_len - ext_len], ext, ext_len) == 0);
1410 #endif
1411 }
1412
1413 const _TCHAR *DLL_PREFIX get_file_path_without_extensiton(const _TCHAR *file_path)
1414 {
1415         static _TCHAR path[8][_MAX_PATH];
1416         static unsigned int table_index = 0;
1417         unsigned int output_index = (table_index++) & 7;
1418         
1419         my_tcscpy_s(path[output_index], _MAX_PATH, file_path);
1420 #if defined(_WIN32) && defined(_MSC_VER)
1421         PathRemoveExtension(path[output_index]);
1422 #elif defined(_USE_QT)
1423         QString delim;
1424         delim = QString::fromUtf8(".");
1425         QString tmp_path = QString::fromUtf8(file_path);
1426         int n = tmp_path.lastIndexOf(delim);
1427         if(n > 0) {
1428                 tmp_path = tmp_path.left(n);
1429         }
1430         //printf("%s\n", tmp_path.toUtf8().constData());
1431         memset(path[output_index], 0x00, sizeof(_TCHAR) * _MAX_PATH);
1432         strncpy(path[output_index], tmp_path.toUtf8().constData(), _MAX_PATH - 1);
1433 #else
1434 #endif
1435         return (const _TCHAR *)path[output_index];
1436 }
1437
1438 void DLL_PREFIX get_long_full_path_name(const _TCHAR* src, _TCHAR* dst, size_t dst_len)
1439 {
1440 #ifdef _WIN32
1441         _TCHAR tmp[_MAX_PATH];
1442         if(GetFullPathName(src, _MAX_PATH, tmp, NULL) == 0) {
1443                 my_tcscpy_s(dst, dst_len, src);
1444         } else if(GetLongPathName(tmp, dst, _MAX_PATH) == 0) {
1445                 my_tcscpy_s(dst, dst_len, tmp);
1446         }
1447 #elif defined(_USE_QT)
1448         QString tmp_path = QString::fromUtf8(src);
1449         QFileInfo info(tmp_path);
1450         my_tcscpy_s(dst, dst_len, info.absoluteFilePath().toLocal8Bit().constData());
1451 #else
1452         // write code for your environment
1453         
1454 #endif
1455 }
1456
1457 const _TCHAR *DLL_PREFIX get_parent_dir(const _TCHAR* file)
1458 {
1459         static _TCHAR path[8][_MAX_PATH];
1460         static unsigned int table_index = 0;
1461         unsigned int output_index = (table_index++) & 7;
1462         
1463 #ifdef _WIN32
1464         _TCHAR *ptr;
1465         GetFullPathName(file, _MAX_PATH, path[output_index], &ptr);
1466         if(ptr != NULL) {
1467                 *ptr = _T('\0');
1468         }
1469 #elif defined(_USE_QT)
1470         QString delim;
1471 #if defined(Q_OS_WIN)
1472         delim = QString::fromUtf8("\\");
1473 #else
1474         delim = QString::fromUtf8("/");
1475 #endif
1476         QString tmp_path = QString::fromUtf8(file);
1477         int n = tmp_path.lastIndexOf(delim);
1478         if(n > 0) {
1479                 tmp_path = tmp_path.left(n);
1480                 tmp_path.append(delim);
1481         }
1482         //printf("%s\n", tmp_path.toUtf8().constData());
1483         memset(path[output_index], 0x00, sizeof(_TCHAR) * _MAX_PATH);
1484         strncpy(path[output_index], tmp_path.toUtf8().constData(), _MAX_PATH - 1);
1485 #else
1486         // write code for your environment
1487 #endif
1488         return path[output_index];
1489 }
1490
1491 const wchar_t *DLL_PREFIX char_to_wchar(const char *cs)
1492 {
1493         // char to wchar_t
1494         static wchar_t ws[4096];
1495         
1496 #if defined(_WIN32) || defined(_USE_QT)
1497         mbstowcs(ws, cs, strlen(cs));
1498 #else
1499         // write code for your environment
1500 #endif
1501         return ws;
1502 }
1503
1504 const char *DLL_PREFIX wchar_to_char(const wchar_t *ws)
1505 {
1506         // wchar_t to char
1507         static char cs[4096];
1508         
1509 #ifdef _WIN32
1510         wcstombs(cs, ws, wcslen(ws));
1511 #elif defined(_USE_QT)
1512         wcstombs(cs, ws, wcslen(ws));
1513 #else
1514         // write code for your environment
1515 #endif
1516         return cs;
1517 }
1518
1519 const _TCHAR *DLL_PREFIX char_to_tchar(const char *cs)
1520 {
1521 #if defined(_UNICODE) && defined(SUPPORT_TCHAR_TYPE)
1522         // char to wchar_t
1523         return char_to_wchar(cs);
1524 #else
1525         // char to char
1526         return cs;
1527 #endif
1528 }
1529
1530 const char *DLL_PREFIX tchar_to_char(const _TCHAR *ts)
1531 {
1532 #if defined(_UNICODE) && defined(SUPPORT_TCHAR_TYPE)
1533         // wchar_t to char
1534         return wchar_to_char(ts);
1535 #else
1536         // char to char
1537         return ts;
1538 #endif
1539 }
1540
1541 const _TCHAR *DLL_PREFIX wchar_to_tchar(const wchar_t *ws)
1542 {
1543 #if defined(_UNICODE) && defined(SUPPORT_TCHAR_TYPE)
1544         // wchar_t to wchar_t
1545         return ws;
1546 #else
1547         // wchar_t to char
1548         return wchar_to_char(ws);
1549 #endif
1550 }
1551
1552 const wchar_t *DLL_PREFIX tchar_to_wchar(const _TCHAR *ts)
1553 {
1554 #if defined(_UNICODE) && defined(SUPPORT_TCHAR_TYPE)
1555         // wchar_t to wchar_t
1556         return ts;
1557 #else
1558         // char to wchar_t
1559         return char_to_wchar(ts);
1560 #endif
1561 }
1562
1563 const _TCHAR *DLL_PREFIX create_string(const _TCHAR* format, ...)
1564 {
1565         static _TCHAR buffer[8][1024];
1566         static unsigned int table_index = 0;
1567         unsigned int output_index = (table_index++) & 7;
1568         va_list ap;
1569         
1570         va_start(ap, format);
1571         my_vstprintf_s(buffer[output_index], 1024, format, ap);
1572         va_end(ap);
1573         return (const _TCHAR *)buffer[output_index];
1574 }
1575
1576 int32_t DLL_PREFIX muldiv_s32(int32_t nNumber, int32_t nNumerator, int32_t nDenominator)
1577 {
1578         try {
1579                 int64_t tmp;
1580                 tmp  = (int64_t)nNumber;
1581                 tmp *= (int64_t)nNumerator;
1582                 tmp /= (int64_t)nDenominator;
1583                 return (int32_t)tmp;
1584         } catch(...) {
1585                 double tmp;
1586                 tmp  = (double)nNumber;
1587                 tmp *= (double)nNumerator;
1588                 tmp /= (double)nDenominator;
1589                 if(tmp < 0) {
1590                         return (int32_t)(tmp - 0.5);
1591                 } else {
1592                         return (int32_t)(tmp + 0.5);
1593                 }
1594         }
1595 }
1596
1597 uint32_t DLL_PREFIX muldiv_u32(uint32_t nNumber, uint32_t nNumerator, uint32_t nDenominator)
1598 {
1599         try {
1600                 uint64_t tmp;
1601                 tmp  = (uint64_t)nNumber;
1602                 tmp *= (uint64_t)nNumerator;
1603                 tmp /= (uint64_t)nDenominator;
1604                 return (uint32_t)tmp;
1605         } catch(...) {
1606                 double tmp;
1607                 tmp  = (double)nNumber;
1608                 tmp *= (double)nNumerator;
1609                 tmp /= (double)nDenominator;
1610                 return (uint32_t)(tmp + 0.5);
1611         }
1612 }
1613
1614 static bool _crc_initialized = false;
1615 static uint32_t _crc_table[256] = {0};
1616 static void init_crc32_table(void)
1617 {
1618         for(int i = 0; i < 256; i++) {
1619                 uint32_t c = i;
1620                 for(int j = 0; j < 8; j++) {
1621                         if(c & 1) {
1622                                 c = (c >> 1) ^ 0xedb88320;
1623                         } else {
1624                                 c >>= 1;
1625                         }
1626                 }
1627                 _crc_table[i] = c;
1628         }
1629         _crc_initialized = true;
1630 }
1631
1632 uint32_t DLL_PREFIX get_crc32(uint8_t data[], int size)
1633 {
1634         const uint32_t *table = (const uint32_t *)_crc_table;
1635         if(!_crc_initialized) {
1636                 init_crc32_table();
1637         }
1638         
1639         uint32_t c = ~0;
1640         for(int i = 0; i < size; i++) {
1641                 c = table[(c ^ data[i]) & 0xff] ^ (c >> 8);
1642         }
1643         return ~c;
1644 }
1645
1646 uint32_t DLL_PREFIX calc_crc32(uint32_t seed, uint8_t data[], int size)
1647 {
1648 #if 0
1649         if(!_crc_initialized) {
1650                 init_crc32_table();
1651         }
1652         const uint32_t *table = (const uint32_t *)_crc_table;
1653
1654         uint32_t c = ~seed;
1655         for(int i = 0; i < size; i++) {
1656                 c = table[(c ^ data[i]) & 0xff] ^ (c >> 8);
1657         }
1658         return ~c;
1659 #else
1660         // Calculate CRC32
1661         // Refer to : https://qiita.com/mikecat_mixc/items/e5d236e3a3803ef7d3c5
1662         static const uint32_t CRC_MAGIC_WORD = 0x04C11DB7;
1663         uint32_t crc = seed;
1664         uint8_t *ptr = data;
1665         uint8_t d;
1666         int bytes = size;
1667         bool is_overflow;
1668         for(int i = 0; i < bytes; i++) {
1669                 d = *ptr++;
1670                 for(int bit = 0; bit < 8; bit++) {
1671                         is_overflow = ((crc & 0x1) != 0);
1672                         crc = crc >> 1;
1673                         if((d & 0x01) != 0) crc = crc | 0x80000000;
1674                         if(is_overflow) crc = crc ^ ((uint32_t)~CRC_MAGIC_WORD);
1675                         d >>= 1;
1676                 }
1677         }
1678         return crc;
1679 #endif
1680 }
1681
1682 uint16_t DLL_PREFIX jis_to_sjis(uint16_t jis)
1683 {
1684         pair32_t tmp;
1685         
1686         tmp.w.l = jis - 0x2121;
1687         if(tmp.w.l & 0x100) {
1688                 tmp.w.l += 0x9e;
1689         } else {
1690                 tmp.w.l += 0x40;
1691         }
1692         if(tmp.b.l > 0x7f) {
1693                 tmp.w.l += 0x01;
1694         }
1695         tmp.b.h = (tmp.b.h >> 1) + 0x81;
1696         if(tmp.w.l >= 0xa000) {
1697                 tmp.w.l += 0x4000;
1698         }
1699         return tmp.w.l;
1700 }
1701
1702 int DLL_PREFIX decibel_to_volume(int decibel)
1703 {
1704         // +1 equals +0.5dB (same as fmgen)
1705         return (int)(1024.0 * pow(10.0, decibel / 40.0) + 0.5);
1706 }
1707
1708 int32_t DLL_PREFIX apply_volume(int32_t sample, int volume)
1709 {
1710 //      int64_t output;
1711         int32_t output;
1712         if(sample < 0) {
1713                 output = -sample;
1714                 output *= volume;
1715                 output >>= 10;
1716                 output = -output;
1717         } else {
1718                 output = sample;
1719                 output *= volume;
1720                 output >>= 10;
1721         }
1722 //      if(output > 2147483647) {
1723 //              return 2147483647;
1724 //      } else if(output < (-2147483647 - 1)) {
1725 //              return (-2147483647 - 1);
1726 //      } else {
1727 //              return (int32_t)output;
1728 //      }
1729         return output;
1730 }
1731
1732 void DLL_PREFIX get_host_time(cur_time_t* cur_time)
1733 {
1734 #ifdef _WIN32
1735         SYSTEMTIME sTime;
1736         GetLocalTime(&sTime);
1737         cur_time->year = sTime.wYear;
1738         cur_time->month = sTime.wMonth;
1739         cur_time->day = sTime.wDay;
1740         cur_time->day_of_week = sTime.wDayOfWeek;
1741         cur_time->hour = sTime.wHour;
1742         cur_time->minute = sTime.wMinute;
1743         cur_time->second = sTime.wSecond;
1744 #else
1745         time_t timer = time(NULL);
1746         struct tm *local = localtime(&timer);
1747         cur_time->year = local->tm_year + 1900;
1748         cur_time->month = local->tm_mon + 1;
1749         cur_time->day = local->tm_mday;
1750         cur_time->day_of_week = local->tm_wday;
1751         cur_time->hour = local->tm_hour;
1752         cur_time->minute = local->tm_min;
1753         cur_time->second = local->tm_sec;
1754 #endif
1755 }
1756
1757
1758
1759 void DLL_PREFIX cur_time_t::increment()
1760 {
1761         if(++second >= 60) {
1762                 second = 0;
1763                 if(++minute >= 60) {
1764                         minute = 0;
1765                         if(++hour >= 24) {
1766                                 hour = 0;
1767                                 // days in this month
1768                                 int days = 31;
1769                                 if(month == 2) {
1770                                         days = LEAP_YEAR(year) ? 29 : 28;
1771                                 } else if(month == 4 || month == 6 || month == 9 || month == 11) {
1772                                         days = 30;
1773                                 }
1774                                 if(++day > days) {
1775                                         day = 1;
1776                                         if(++month > 12) {
1777                                                 month = 1;
1778                                                 year++;
1779                                         }
1780                                 }
1781                                 if(++day_of_week >= 7) {
1782                                         day_of_week = 0;
1783                                 }
1784                         }
1785                 }
1786         }
1787 }
1788
1789 void DLL_PREFIX cur_time_t::update_year()
1790 {
1791         // 1970-2069
1792         if(year < 70) {
1793                 year += 2000;
1794         } else if(year < 100) {
1795                 year += 1900;
1796         }
1797 }
1798
1799 void DLL_PREFIX cur_time_t::update_day_of_week()
1800 {
1801         static const int t[] = {0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4};
1802         int y = year - (month < 3);
1803         day_of_week = (y + y / 4 - y / 100 + y / 400 + t[month - 1] + day) % 7;
1804 }
1805
1806 #define STATE_VERSION   1
1807
1808
1809 bool DLL_PREFIX cur_time_t::process_state(void *f, bool loading)
1810 {
1811         FILEIO *state_fio = (FILEIO *)f;
1812         
1813         if(!state_fio->StateCheckUint32(STATE_VERSION)) {
1814                 return false;
1815         }
1816         state_fio->StateValue(year);
1817         state_fio->StateValue(month);
1818         state_fio->StateValue(day);
1819         state_fio->StateValue(day_of_week);
1820         state_fio->StateValue(hour);
1821         state_fio->StateValue(minute);
1822         state_fio->StateValue(second);
1823         state_fio->StateValue(initialized);
1824         return true;
1825 }
1826
1827 const _TCHAR *DLL_PREFIX get_symbol(symbol_t *first_symbol, uint32_t addr)
1828 {
1829         static _TCHAR name[8][1024];
1830         static unsigned int table_index = 0;
1831         unsigned int output_index = (table_index++) & 7;
1832         
1833         if(first_symbol != NULL) {
1834                 for(symbol_t* symbol = first_symbol; symbol; symbol = symbol->next_symbol) {
1835                         if(symbol->addr == addr) {
1836                                 my_tcscpy_s(name[output_index], 1024, symbol->name);
1837                                 return name[output_index];
1838                         }
1839                 }
1840         }
1841         return NULL;
1842 }
1843
1844 const _TCHAR *DLL_PREFIX get_value_or_symbol(symbol_t *first_symbol, const _TCHAR *format, uint32_t addr)
1845 {
1846         static _TCHAR name[8][1024];
1847         static unsigned int table_index = 0;
1848         unsigned int output_index = (table_index++) & 7;
1849         
1850         if(first_symbol != NULL) {
1851                 for(symbol_t* symbol = first_symbol; symbol; symbol = symbol->next_symbol) {
1852                         if(symbol->addr == addr) {
1853                                 my_tcscpy_s(name[output_index], 1024, symbol->name);
1854                                 return name[output_index];
1855                         }
1856                 }
1857         }
1858         my_stprintf_s(name[output_index], 1024, format, addr);
1859         return name[output_index];
1860 }
1861
1862 const _TCHAR *DLL_PREFIX get_value_and_symbol(symbol_t *first_symbol, const _TCHAR *format, uint32_t addr)
1863 {
1864         static _TCHAR name[8][1024];
1865         static unsigned int table_index = 0;
1866         unsigned int output_index = (table_index++) & 7;
1867         
1868         my_stprintf_s(name[output_index], 1024, format, addr);
1869         
1870         if(first_symbol != NULL) {
1871                 for(symbol_t* symbol = first_symbol; symbol; symbol = symbol->next_symbol) {
1872                         if(symbol->addr == addr) {
1873                                 _TCHAR temp[1024];
1874 //                              my_stprintf_s(temp, 1024, _T(" (%s)"), symbol->name);
1875                                 my_stprintf_s(temp, 1024, _T(";%s"), symbol->name);
1876                                 my_tcscat_s(name[output_index], 1024, temp);
1877                                 return name[output_index];
1878                         }
1879                 }
1880         }
1881         return name[output_index];
1882 }
1883
1884 // Use this before writing wav_data.
1885 bool DLL_PREFIX write_dummy_wav_header(void *__fio)
1886 {
1887         if(__fio == NULL) return false;
1888
1889         FILEIO *fio = (FILEIO *)__fio;
1890         uint8_t dummy[sizeof(wav_header_t) + sizeof(wav_chunk_t)];
1891
1892         if(!fio->IsOpened()) return false;
1893         
1894         memset(dummy, 0, sizeof(dummy));
1895         fio->Fwrite(dummy, sizeof(dummy), 1);
1896         return true;
1897 }
1898 // Use this after writing wav_data.
1899 bool DLL_PREFIX set_wav_header(wav_header_t *header, wav_chunk_t *first_chunk, uint16_t channels, uint32_t rate,
1900                                                            uint16_t bits, size_t file_length)
1901 {
1902         uint32_t length = (uint32_t) file_length;
1903         
1904         if(header == NULL) return false;
1905         if(first_chunk == NULL) return false;
1906
1907         pair32_t __riff_chunk_size;
1908         pair32_t __fmt_chunk_size;
1909         pair32_t __wav_chunk_size;
1910         pair16_t __fmt_id;
1911         pair16_t __channels;
1912         pair32_t __sample_rate;
1913         pair32_t __data_speed;
1914         pair16_t __block_size;
1915         pair16_t __sample_bits;
1916
1917         __riff_chunk_size.d = length - 8;
1918         __fmt_chunk_size.d = 16;
1919         __fmt_id.u16 = 1;
1920         __channels.u16 = channels;
1921         __sample_rate.d = rate;
1922         __block_size.u16 = (uint16_t)((channels * bits) / 8);
1923         __sample_bits.u16 = bits;
1924         __data_speed.d = rate * (uint32_t)(__block_size.u16);
1925
1926         memcpy(&(header->riff_chunk.id), "RIFF", 4);
1927         header->riff_chunk.size = __riff_chunk_size.get_4bytes_le_to();
1928         
1929         memcpy(&(header->wave), "WAVE", 4);
1930         memcpy(&(header->fmt_chunk.id), "fmt ", 4);
1931         header->fmt_chunk.size = __fmt_chunk_size.get_4bytes_le_to();
1932         header->format_id = __fmt_id.get_2bytes_le_to();
1933         header->channels = __channels.get_2bytes_le_to();
1934         header->sample_rate = __sample_rate.get_4bytes_le_to();
1935         header->data_speed =  __data_speed.get_4bytes_le_to();
1936         header->block_size = __block_size.get_2bytes_le_to();
1937         header->sample_bits = __sample_bits.get_2bytes_le_to();
1938
1939         memcpy(&(first_chunk->id), "data", 4);
1940         __wav_chunk_size.d = length - sizeof(wav_header_t) - sizeof(wav_chunk_t);
1941         first_chunk->size = __wav_chunk_size.get_4bytes_le_to();
1942
1943         return true;
1944 }
1945 // Note: buffers are allocated by this, You should free() within user class.
1946 bool DLL_PREFIX load_wav_to_stereo(void *__fio, int16_t **left_buf, int16_t **right_buf, uint32_t *rate, int *got_samples)
1947 {
1948
1949         if(__fio == NULL) return false;
1950         if(left_buf == NULL) return false;
1951         if(right_buf == NULL) return false;
1952         if(rate == NULL) return false;
1953         if(got_samples == NULL) return false;
1954         //if((bits != 8) && (bits != 16) && (bits != 32)) return false;
1955
1956         FILEIO *fio = (FILEIO *)__fio;
1957         if(!fio->IsOpened()) return false;
1958
1959         
1960         int16_t *left_buffer = NULL;
1961         int16_t *right_buffer = NULL;
1962         size_t samples = 0;
1963         uint32_t sample_rate = 0;
1964         
1965         wav_header_t header;
1966         wav_chunk_t  chunk;
1967
1968         pair16_t __fmt_id;
1969         pair16_t __sample_bits;
1970         pair16_t __channels;
1971         pair32_t __sample_rate;
1972         pair32_t __chunk_size;
1973
1974         fio->Fread(&header, sizeof(header), 1);
1975         __fmt_id.set_2bytes_le_from(header.format_id);
1976         __sample_bits.set_2bytes_le_from(header.sample_bits);
1977         __chunk_size.set_4bytes_le_from(header.fmt_chunk.size);
1978         __channels.set_2bytes_le_from(header.channels);
1979         __sample_rate.set_4bytes_le_from(header.sample_rate);
1980
1981         if((__fmt_id.u16 == 1) && ((__sample_bits.u16 == 8) || (__sample_bits.u16 == 16) || (__sample_bits.u16 == 32))) {
1982                 fio->Fseek(__chunk_size.d - 16, FILEIO_SEEK_CUR);
1983                 bool is_eof = false;
1984                 while(1) {
1985                         if(fio->Fread(&chunk, sizeof(chunk), 1) != 1) {
1986                                 is_eof = true;
1987                                 break;
1988                         }
1989                         __chunk_size.set_4bytes_le_from(chunk.size);
1990                         if(strncmp(chunk.id, "data", 4) == 0) {
1991                                 break;
1992                         }
1993                         fio->Fseek(__chunk_size.d, FILEIO_SEEK_CUR);
1994                 }
1995                 __chunk_size.set_4bytes_le_from(chunk.size);
1996                 if(is_eof) {
1997                         fio->Fclose();
1998                         delete fio;
1999                         return false;
2000                 }
2001                 
2002                 samples = (size_t)(__chunk_size.d / __channels.u16);
2003                 int16_t data_l, data_r;
2004                 union {
2005                         int16_t s16;
2006                         struct {
2007                                 uint8_t l, h;
2008                         } b;
2009                 } pair16;
2010                 union {
2011                         int32_t s32;
2012                         struct {
2013                                 uint8_t l, h, h2, h3;
2014                         } b;
2015                 } pair32;
2016                 
2017                 if(samples > 0) {
2018                         if(__sample_bits.u16 == 16) {
2019                                 samples /= 2;
2020                         } else if(__sample_bits.u16 == 32) {
2021                                 samples /= 4;
2022                         }
2023                         if(samples == 0) return false;
2024                         sample_rate = __sample_rate.d;
2025
2026                         left_buffer = (int16_t *)malloc(samples * sizeof(int16_t));
2027                         right_buffer = (int16_t *)malloc(samples * sizeof(int16_t));
2028                         if(left_buffer == NULL) {
2029                                 if(right_buffer != NULL) free(right_buffer);
2030                                 return false;
2031                         }
2032                         if(right_buffer == NULL) {
2033                                 if(left_buffer != NULL) free(left_buffer);
2034                                 return false;
2035                         }
2036                         switch(__sample_bits.u16) {
2037                         case 8:
2038                                 if(__channels.s16 == 1) {
2039                                         for(int i = 0; i < samples; i++) {
2040                                                 data_l = (int16_t)(fio->FgetUint8());
2041                                                 data_l = (data_l - 128) * 256;
2042                                                 left_buffer[i] = data_l;
2043                                                 right_buffer[i] = data_l;
2044                                         }
2045                                 } else if(__channels.s16 == 2) {
2046                                         for(int i = 0; i < samples; i++) {
2047                                                 data_l = (int16_t)(fio->FgetUint8());
2048                                                 data_l = (data_l - 128) * 256;
2049                                                 data_r = (int16_t)(fio->FgetUint8());
2050                                                 data_r = (data_r - 128) * 256;
2051                                                 left_buffer[i] = data_l;
2052                                                 right_buffer[i] = data_r;
2053                                         }
2054                                 }
2055                                 break;
2056                         case 16:
2057                                 if(__channels.s16 == 1) {
2058                                         for(int i = 0; i < samples; i++) {
2059                                                 pair16.b.l = fio->FgetUint8();
2060                                                 pair16.b.h = fio->FgetUint8();
2061                                                 data_l = pair16.s16;
2062                                                 
2063                                                 left_buffer[i] = data_l;
2064                                                 right_buffer[i] = data_l;
2065                                         }
2066                                 } else if(__channels.s16 == 2) {
2067                                         for(int i = 0; i < samples; i++) {
2068                                                 pair16.b.l = fio->FgetUint8();
2069                                                 pair16.b.h = fio->FgetUint8();
2070                                                 data_l = pair16.s16;
2071                                                 
2072                                                 pair16.b.l = fio->FgetUint8();
2073                                                 pair16.b.h = fio->FgetUint8();
2074                                                 data_r = pair16.s16;
2075                                                 left_buffer[i] = data_l;
2076                                                 right_buffer[i] = data_r;
2077                                         }
2078                                 }
2079                                 break;
2080                         case 32:
2081                                 if(__channels.s16 == 1) {
2082                                         for(int i = 0; i < samples; i++) {
2083                                                 pair32.b.l = fio->FgetUint8();
2084                                                 pair32.b.h = fio->FgetUint8();
2085                                                 pair32.b.h2 = fio->FgetUint8();
2086                                                 pair32.b.h3 = fio->FgetUint8();
2087                                                 data_l = (int16_t)(pair32.s32 / 65536);
2088                                                 
2089                                                 left_buffer[i] = data_l;
2090                                                 right_buffer[i] = data_l;
2091                                         }
2092                                 } else if(__channels.s16 == 2) {
2093                                         for(int i = 0; i < samples; i++) {
2094                                                 pair32.b.l = fio->FgetUint8();
2095                                                 pair32.b.h = fio->FgetUint8();
2096                                                 pair32.b.h2 = fio->FgetUint8();
2097                                                 pair32.b.h3 = fio->FgetUint8();
2098                                                 data_l = (int16_t)(pair32.s32 / 65536);
2099                                                 
2100                                                 pair32.b.l = fio->FgetUint8();
2101                                                 pair32.b.h = fio->FgetUint8();
2102                                                 pair32.b.h2 = fio->FgetUint8();
2103                                                 pair32.b.h3 = fio->FgetUint8();
2104                                                 data_r = (int16_t)(pair32.s32 / 65536);
2105                                                 
2106                                                 left_buffer[i] = data_l;
2107                                                 right_buffer[i] = data_r;
2108                                         }
2109                                 }
2110                                 break;
2111                         default:
2112                                 break;
2113                         }
2114                 }
2115         } else {
2116                 return false;
2117         }
2118         *left_buf = left_buffer;
2119         *right_buf = right_buffer;
2120         *rate = sample_rate;
2121         *got_samples = (int)samples;
2122         return true;
2123 }
2124
2125 bool DLL_PREFIX load_wav_to_monoral(void *__fio, int16_t **buffer, uint32_t *rate, int *got_samples)
2126 {
2127
2128         if(__fio == NULL) return false;
2129         if(buffer == NULL) return false;
2130         if(rate == NULL) return false;
2131         if(got_samples == NULL) return false;
2132         //if((bits != 8) && (bits != 16) && (bits != 32)) return false;
2133
2134         FILEIO *fio = (FILEIO *)__fio;
2135         if(!fio->IsOpened()) return false;
2136
2137         
2138         int16_t *left_buffer = NULL;
2139         size_t samples = 0;
2140         uint32_t sample_rate = 0;
2141         
2142         wav_header_t header;
2143         wav_chunk_t  chunk;
2144
2145         pair16_t __fmt_id;
2146         pair16_t __sample_bits;
2147         pair16_t __channels;
2148         pair32_t __sample_rate;
2149         pair32_t __chunk_size;
2150
2151         fio->Fread(&header, sizeof(header), 1);
2152         __fmt_id.set_2bytes_le_from(header.format_id);
2153         __sample_bits.set_2bytes_le_from(header.sample_bits);
2154         __chunk_size.set_4bytes_le_from(header.fmt_chunk.size);
2155         __channels.set_2bytes_le_from(header.channels);
2156         __sample_rate.set_4bytes_le_from(header.sample_rate);
2157
2158         if((__fmt_id.u16 == 1) && ((__sample_bits.u16 == 8) || (__sample_bits.u16 == 16) || (__sample_bits.u16 == 32))) {
2159                 fio->Fseek(__chunk_size.d - 16, FILEIO_SEEK_CUR);
2160                 bool is_eof = false;
2161                 while(1) {
2162                         if(fio->Fread(&chunk, sizeof(chunk), 1) != 1) {
2163                                 is_eof = true;
2164                                 break;
2165                         }
2166                         __chunk_size.set_4bytes_le_from(chunk.size);
2167                         if(strncmp(chunk.id, "data", 4) == 0) {
2168                                 break;
2169                         }
2170                         fio->Fseek(__chunk_size.d, FILEIO_SEEK_CUR);
2171                 }
2172                 __chunk_size.set_4bytes_le_from(chunk.size);
2173                 if(is_eof) {
2174                         fio->Fclose();
2175                         delete fio;
2176                         return false;
2177                 }
2178                 
2179                 samples = (size_t)(__chunk_size.d / __channels.u16);
2180                 int16_t data_l, data_r;
2181                 int32_t data32_l, data32_r;
2182                 union {
2183                         int16_t s16;
2184                         struct {
2185                                 uint8_t l, h;
2186                         } b;
2187                 } pair16;
2188                 union {
2189                         int32_t s32;
2190                         struct {
2191                                 uint8_t l, h, h2, h3;
2192                         } b;
2193                 } pair32;
2194                 
2195                 if(samples > 0) {
2196                         if(__sample_bits.u16 == 16) {
2197                                 samples /= 2;
2198                         } else if(__sample_bits.u16 == 32) {
2199                                 samples /= 4;
2200                         }
2201                         if(samples == 0) return false;
2202                         sample_rate = __sample_rate.d;
2203
2204                         left_buffer = (int16_t *)malloc(samples * sizeof(int16_t));
2205                         if(left_buffer == NULL) {
2206                                 return false;
2207                         }
2208                         switch(__sample_bits.u16) {
2209                         case 8:
2210                                 if(__channels.s16 == 1) {
2211                                         for(int i = 0; i < samples; i++) {
2212                                                 data_l = (int16_t)(fio->FgetUint8());
2213                                                 data_l = (data_l - 128) * 256;
2214                                                 left_buffer[i] = data_l;
2215                                         }
2216                                 } else if(__channels.s16 == 2) {
2217                                         for(int i = 0; i < samples; i++) {
2218                                                 data_l = (int16_t)(fio->FgetUint8());
2219                                                 data_l = (data_l - 128) * 256;
2220                                                 data_r = (int16_t)(fio->FgetUint8());
2221                                                 data_r = (data_r - 128) * 256;
2222                                                 left_buffer[i] = (data_l + data_r) / 2;
2223                                         }
2224                                 }
2225                                 break;
2226                         case 16:
2227                                 if(__channels.s16 == 1) {
2228                                         for(int i = 0; i < samples; i++) {
2229                                                 pair16.b.l = fio->FgetUint8();
2230                                                 pair16.b.h = fio->FgetUint8();
2231                                                 data_l = pair16.s16;
2232                                                 
2233                                                 left_buffer[i] = data_l;
2234                                         }
2235                                 } else if(__channels.s16 == 2) {
2236                                         for(int i = 0; i < samples; i++) {
2237                                                 pair16.b.l = fio->FgetUint8();
2238                                                 pair16.b.h = fio->FgetUint8();
2239                                                 data_l = pair16.s16;
2240                                                 
2241                                                 pair16.b.l = fio->FgetUint8();
2242                                                 pair16.b.h = fio->FgetUint8();
2243                                                 data_r = pair16.s16;
2244                                                 left_buffer[i] = (data_l + data_r) / 2;
2245                                         }
2246                                 }
2247                                 break;
2248                         case 32:
2249                                 if(__channels.s16 == 1) {
2250                                         for(int i = 0; i < samples; i++) {
2251                                                 pair32.b.l = fio->FgetUint8();
2252                                                 pair32.b.h = fio->FgetUint8();
2253                                                 pair32.b.h2 = fio->FgetUint8();
2254                                                 pair32.b.h3 = fio->FgetUint8();
2255                                                 data_l = (int16_t)(pair32.s32 / 65536);
2256                                                 
2257                                                 left_buffer[i] = data_l;
2258                                         }
2259                                 } else if(__channels.s16 == 2) {
2260                                         for(int i = 0; i < samples; i++) {
2261                                                 pair32.b.l = fio->FgetUint8();
2262                                                 pair32.b.h = fio->FgetUint8();
2263                                                 pair32.b.h2 = fio->FgetUint8();
2264                                                 pair32.b.h3 = fio->FgetUint8();
2265                                                 data32_l = pair32.s32 / 65536;
2266                                                 
2267                                                 pair32.b.l = fio->FgetUint8();
2268                                                 pair32.b.h = fio->FgetUint8();
2269                                                 pair32.b.h2 = fio->FgetUint8();
2270                                                 pair32.b.h3 = fio->FgetUint8();
2271                                                 data32_r = pair32.s32 / 65536;
2272                                                 
2273                                                 left_buffer[i] = (int16_t)((data32_l + data32_r) / 2);
2274                                         }
2275                                 }
2276                                 break;
2277                         default:
2278                                 break;
2279                         }
2280                 }
2281         } else {
2282                 return false;
2283         }
2284         *buffer = left_buffer;
2285         *rate = sample_rate;
2286         *got_samples = (int)samples;
2287         return true;
2288 }
2289
2290 DLL_PREFIX const _TCHAR *get_lib_common_version()
2291 {
2292 #if defined(__LIBEMU_UTIL_VERSION)
2293         return (const _TCHAR *)__LIBEMU_UTIL_VERSION;
2294 #else
2295         return (const _TCHAR *)"\0";
2296 #endif
2297 }