--- /dev/null
+/*
+ * Renderer using OPENCL/GL
+ * (C) 2012 K.Ohta <whatisthis.sowhat@gmail.com>
+ * History:
+ * Nov 01,2012: Initial
+ */
+
+
+#include <agar/core.h>
+#include <agar/core/types.h>
+#include <agar/gui.h>
+
+
+#include "api_draw.h"
+#include "api_kbd.h"
+
+#include "agar_xm7.h"
+#include "agar_draw.h"
+#include "agar_gldraw.h"
+#include "agar_glutil.h"
+#include "agar_logger.h"
+#include "xm7.h"
+#include "display.h"
+#include "subctrl.h"
+#include "device.h"
+#include "multipag.h"
+#include "ttlpalet.h"
+#include "apalet.h"
+
+#include "agar_glcl.h"
+
+#define LOGSIZE 1024*1024
+
+extern "C"{
+extern Uint8 *vram_pb;
+extern Uint8 *vram_pr;
+extern Uint8 *vram_pg;
+}
+
+extern PFNGLBINDBUFFERPROC glBindBuffer;
+extern float fBrightR;
+extern float fBrightG;
+extern float fBrightB;
+
+
+
+GLCLDraw::GLCLDraw()
+{
+ int i;
+ pixelBuffer = NULL;
+ AG_MutexInit(&mutex_buffer);
+ AG_MutexInit(&mutex_palette);
+ TransferBuffer = NULL;
+ nkernels = 0;
+ using_device = 0;
+ for(i = 0; i < 8; i++) device_type[i] = 0;
+ for(i = 0; i < 8; i++) local_memsize[i] = 0;
+}
+
+GLCLDraw::~GLCLDraw()
+{
+ cl_int ret;
+ int i;
+ if(nkernels > 0) {
+ for(i = 0; i < nkernels; i++) if(kernels_array[i] != NULL) ret = clReleaseKernel(kernels_array[i]);
+ }
+
+ if(program != NULL) ret |= clReleaseProgram(program);
+ if(command_queue != NULL) ret |= clReleaseCommandQueue(command_queue);
+ if(context != NULL) ret |= clReleaseContext(context);
+ if(properties != NULL) free(properties);
+ for(i = 0; i < 2; i++) {
+ if(inbuf[i] != NULL) ret |= clReleaseMemObject(inbuf[i]);
+ if(palette_buf[i] != NULL) ret |= clReleaseMemObject(palette_buf[i]);
+ }
+ if(outbuf != NULL) ret |= clReleaseMemObject(outbuf);
+ if(table != NULL) ret |= clReleaseMemObject(table);
+ if(pixelBuffer != NULL) free(pixelBuffer);
+ AG_MutexDestroy(&mutex_buffer);
+ AG_MutexDestroy(&mutex_palette);
+}
+
+static void cl_notify_log(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+ Uint8 dump[128];
+ char dumpStr[1024];
+ int i;
+
+ dumpStr[0] = '\0';
+ XM7_DebugLog(XM7_LOG_WARN, "CL Notify: %s", errinfo);
+}
+
+int GLCLDraw::GetGLEnabled(void)
+{
+ if(bCLEnableKhrGLShare != FALSE) return -1;
+ return 0;
+}
+
+Uint32 *GLCLDraw::GetPixelBuffer(void)
+{
+ Uint32 *p;
+ int ret = 0;
+ p = (Uint32 *) clEnqueueMapBuffer(command_queue, outbuf, CL_TRUE, CL_MAP_READ,
+ 0, (size_t)(640 * 400 * sizeof(Uint32)),
+ 1, &event_exec, &event_release, &ret);
+ if(ret < 0) return NULL;
+ clFlush(command_queue);
+ return p;
+}
+
+int GLCLDraw::ReleasePixelBuffer(Uint32 *p)
+{
+#if 0
+ return 0;
+#else
+ int ret;
+ if(p == NULL) return 0;
+// clFlush(command_queue);
+ ret |= clEnqueueUnmapMemObject(command_queue, outbuf,
+ p, 1, &event_release, NULL);
+ clFinish(command_queue);
+ return ret;
+#endif
+}
+
+int GLCLDraw::GetUsingDeviceNo(void)
+{
+ return using_device;
+}
+
+int GLCLDraw::GetDevices(void)
+{
+ return ret_num_devices;
+}
+
+int GLCLDraw::GetPlatforms(void)
+{
+ return ret_num_platforms;
+}
+
+void GLCLDraw::GetDeviceType(char *str, int maxlen, int num)
+{
+ if((str == NULL) || (maxlen < 1)) return;
+ str[0] = '\0';
+ if((num < 0) || (num >= 8) || (num >= ret_num_devices)) return;
+
+ switch(device_type[num]) {
+ case CL_DEVICE_TYPE_CPU:
+ strncpy(str, "CPU", maxlen - 1);
+ break;
+ case CL_DEVICE_TYPE_GPU:
+ strncpy(str, "GPU", maxlen - 1);
+ break;
+ case CL_DEVICE_TYPE_ACCELERATOR:
+ strncpy(str, "ACCELERATOR", maxlen - 1);
+ break;
+ case CL_DEVICE_TYPE_DEFAULT:
+ strncpy(str, "DEFAULT", maxlen - 1);
+ break;
+ default:
+ strncpy(str, "Unknown", maxlen - 1);
+ break;
+ }
+}
+
+void GLCLDraw::GetDeviceName(char *str, int maxlen, int num)
+{
+ size_t llen;
+
+ if((str == NULL) || (maxlen < 1)) return;
+ str[0] = '\0';
+ if((num < 0) || (num > 8) || (num >= ret_num_devices)) return;
+ clGetDeviceInfo(device_id[num], CL_DEVICE_NAME,
+ maxlen - 1, str, &llen);
+ str[llen - 1] = '\0';
+}
+
+cl_int GLCLDraw::InitContext(int platformnum, int processornum, int GLinterop)
+{
+ cl_int ret;
+ size_t len;
+ char extension_data[1024];
+ size_t llen;
+ size_t extension_len;
+ int i;
+
+ properties = malloc(16 * sizeof(intptr_t));
+ ret = clGetPlatformIDs(8, platform_id, &ret_num_platforms);
+ if(ret != CL_SUCCESS) return ret;
+
+ if(ret_num_platforms <= 0) return CL_INVALID_PLATFORM;
+
+ platform_num = platformnum;
+ if(platform_num >= ret_num_platforms) platform_num = ret_num_platforms - 1;
+ if(platform_num <= 0) platform_num = 0;
+ ret = clGetDeviceIDs(platform_id[platform_num], CL_DEVICE_TYPE_ALL, 8, device_id,
+ &ret_num_devices);
+ if(ret != CL_SUCCESS) return ret;
+ if(ret_num_devices <= 0) {
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : Has no useful device(s).");
+ return ret;
+ }
+ if(ret_num_devices > 8) ret_num_devices = 8;
+ if(ret_num_devices <= 0) return CL_INVALID_DEVICE_TYPE;
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : Found %d processors.", ret_num_devices);
+
+ using_device = processornum;
+ if(using_device >= ret_num_devices) using_device = ret_num_devices - 1;
+ if(using_device <= 0) using_device = 0;
+
+ bCLEnableKhrGLShare = 0;
+
+ for(i = 0; i < ret_num_devices; i++ ){
+
+ extension_data[0] = '\0';
+ GetDeviceName(extension_data, sizeof(extension_data), i);
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : Processor #%d : Name = %s ", i, extension_data);
+
+ extension_data[0] = '\0';
+ clGetDeviceInfo(device_id[i], CL_DEVICE_TYPE,
+ sizeof(cl_ulong), &(device_type[i]), &llen);
+ clGetDeviceInfo(device_id[i], CL_DEVICE_LOCAL_MEM_SIZE,
+ sizeof(cl_ulong), &(local_memsize[i]), &llen);
+ GetDeviceType(extension_data, sizeof(extension_data), i);
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : Processor #%d : TYPE = %s / Local memory size = %d bytes", i, extension_data, local_memsize[i]);
+
+ extension_data[0] = '\0';
+ clGetDeviceInfo(device_id[i], CL_DEVICE_EXTENSIONS,
+ 1024, extension_data, &extension_len);
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : Extension features(#%d):%s", i, extension_data);
+ if(i == using_device) {
+ if(strcasestr(extension_data, "cl_khr_gl_sharing") != NULL) {
+ if(GLinterop != 0) bCLEnableKhrGLShare = -1;
+ } else {
+ bCLEnableKhrGLShare = 0;
+ }
+ }
+ }
+
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : Using device #%d", using_device);
+ if(bCLEnableKhrGLShare != 0) { // This is only under X11. Must fix.
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : GL Interoperability enabled.");
+ properties[0] = CL_GL_CONTEXT_KHR;
+ properties[1] = (cl_context_properties)glXGetCurrentContext();
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : GL Context = %08x", glXGetCurrentContext());
+ properties[2] = CL_GLX_DISPLAY_KHR;
+ properties[3] = (cl_context_properties)glXGetCurrentDisplay();
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : GL Display = %08x", glXGetCurrentDisplay());
+ properties[4] = CL_CONTEXT_PLATFORM;
+ properties[5] = (cl_context_properties)platform_id[platform_num];
+ properties[6] = 0;
+ } else {
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : GL Interoperability disabled.");
+ properties[0] = CL_CONTEXT_PLATFORM;
+ properties[1] = (cl_context_properties)platform_id[platform_num];
+ properties[2] = 0;
+ }
+// if(device_id == NULL) return -1;
+
+ context = clCreateContext(properties, 1, &device_id[using_device], cl_notify_log, NULL, &ret);
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL : Created context : STS = %d", ret);
+ if(ret != CL_SUCCESS) return ret;
+
+ command_queue = clCreateCommandQueue(context, device_id[using_device],
+ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &ret);
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL: Created command queue.");
+ return ret;
+}
+
+static void CL_LogProgramExecute(cl_program program, void *userdata)
+{
+ char *logBuf;
+ size_t length;
+ cl_int r;
+ cl_int n;
+ cl_int num;
+ cl_device_id *devid;
+ class GLCLDraw *t = (class GLCLDraw *)userdata;
+
+ logBuf = (char *)malloc(LOGSIZE * sizeof(char));
+ if((logBuf == NULL) || (t == NULL))return;
+ num = t->ret_num_devices;
+ devid = t->device_id;
+ // printf("DBG: %08x %d\n", t, num);
+ for(n = 0; n < num; n++) {
+ logBuf[0] = '\0';
+ r = clGetProgramBuildInfo(program, devid[n], CL_PROGRAM_BUILD_LOG,
+ LOGSIZE - 1, (void *)logBuf, &length);
+ if((length > 0) && (length <= LOGSIZE)){
+ logBuf[length] = '\0';
+ if(strlen(logBuf) > 0) XM7_DebugLog(XM7_LOG_INFO, "CL :Build Log of Device #%d:%s", n, logBuf);
+ }
+ }
+ free(logBuf);
+ return;
+}
+
+
+cl_int GLCLDraw::BuildFromSource(const char *p)
+{
+ cl_int ret;
+ size_t codeSize;
+ char *logBuf;
+ char compile_options[2048];
+ cl_bool endian_little;
+ compile_options[0] = '\0';
+
+ codeSize = strlen(p);
+ program = clCreateProgramWithSource(context, 1, (const char **)&p,
+ (const size_t *)&codeSize, &ret);
+ XM7_DebugLog(XM7_LOG_INFO, "CL: Build Result=%d", ret);
+ if(ret < CL_SUCCESS) {
+ return ret;
+ }
+
+
+ // Compile from source
+ strncat(compile_options, "-cl-fast-relaxed-math ", sizeof(compile_options) - 1);
+ if(clGetDeviceInfo(device_id[using_device], CL_DEVICE_ENDIAN_LITTLE,
+ sizeof(cl_bool), &endian_little, NULL) == CL_SUCCESS){
+ if(endian_little == CL_TRUE) {
+ strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=1 ", sizeof(compile_options) - 1);
+ } else {
+ strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=0 ", sizeof(compile_options) - 1); // Big endian
+ }
+ } else {
+ strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=1 ", sizeof(compile_options) - 1); // Assume little endian
+ }
+ build_callback = CL_LogProgramExecute;
+ ret = clBuildProgram(program, 1, &device_id[using_device], compile_options,
+ build_callback, (void *)this);
+ XM7_DebugLog(XM7_LOG_INFO, "Compile Result=%d", ret);
+ if(ret != CL_SUCCESS) { // Printout error log.
+ // clReleaseProgram(program);
+ return ret;
+ }
+ ret = clCreateKernelsInProgram(program, sizeof(kernels_array) / sizeof(cl_kernel),
+ kernels_array, &nkernels);
+ if(ret < CL_SUCCESS) {
+ XM7_DebugLog(XM7_LOG_INFO, "Unable to build CL kernel. Status=%d", ret);
+ } else {
+ char funcname[128];
+ int i;
+ size_t size;
+ XM7_DebugLog(XM7_LOG_INFO, "Built %d CL kernel(s).", nkernels);
+ for(i = 0; i < nkernels; i++) {
+ funcname[0] = '\0';
+ if(clGetKernelInfo(kernels_array[i], CL_KERNEL_FUNCTION_NAME,
+ sizeof(funcname) / sizeof(char) - 1,
+ funcname, size) == CL_SUCCESS){
+ if((strncmp(funcname, "getvram8", strlen("getvram8")) == 0) && (kernel_8colors == NULL)) kernel_8colors = &kernels_array[i];
+ if((strncmp(funcname, "getvram4096", strlen("getvram4096")) == 0) && (kernel_4096colors == NULL)) kernel_4096colors = &kernels_array[i];
+ if((strncmp(funcname, "getvram256k", strlen("getvram256k")) == 0) && (kernel_256kcolors == NULL)) kernel_256kcolors = &kernels_array[i];
+ if((strncmp(funcname, "CreateTable", strlen("CreateTable")) == 0) && (kernel_table == NULL)) kernel_table = &kernels_array[i];
+ if((strncmp(funcname, "CopyVram", strlen("CopyVram")) == 0) && (kernel_copyvram == NULL)) kernel_copyvram = &kernels_array[i];
+ }
+ }
+ }
+ return ret;
+}
+
+Uint8 *GLCLDraw::GetBufPtr(Uint32 timeout)
+{
+ Uint32 t = timeout / 10;
+ Uint32 i;
+ BOOL flag = FALSE;
+ if(timeout == 0) {
+ AG_MutexLock(&mutex_buffer);
+ return TransferBuffer;
+ } else {
+ for(i = 0; i < t; i++) {
+ if(AG_MutexTryLock(&mutex_buffer) == 0) {
+ flag = TRUE;
+ break;
+ }
+ AG_Delay(10);
+ }
+ if(flag == FALSE) {
+ t = timeout % 10;
+ AG_Delay(t);
+ if(AG_MutexTryLock(&mutex_buffer) == 0) flag = TRUE;
+ }
+ if(flag == FALSE) return NULL;
+ return TransferBuffer;
+ }
+}
+
+void GLCLDraw::ReleaseBufPtr(void)
+{
+ AG_MutexUnlock(&mutex_buffer);
+}
+
+Uint8 *GLCLDraw::MapTransferBuffer(int bmode)
+{
+ Uint8 *p = NULL;
+ cl_int ret;
+ switch(bmode)
+ {
+ case SCR_200LINE:
+ p = clEnqueueMapBuffer(command_queue, inbuf[inbuf_bank], CL_FALSE, CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, 0x4000 * 3,
+ 0, NULL, &event_uploadvram[0], &ret);
+ break;
+ case SCR_400LINE:
+ p = clEnqueueMapBuffer(command_queue, inbuf[inbuf_bank], CL_FALSE, CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, 0x8000 * 3,
+ 0, NULL, &event_uploadvram[0], &ret);
+ break;
+ case SCR_4096:
+ p = clEnqueueMapBuffer(command_queue, inbuf[inbuf_bank], CL_FALSE, CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, 0x2000 * 12,
+ 0, NULL, &event_uploadvram[0], &ret);
+ break;
+ case SCR_262144:
+ p = clEnqueueMapBuffer(command_queue, inbuf[inbuf_bank], CL_FALSE, CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, 0x2000 * 18,
+ 0, NULL, &event_uploadvram[0], &ret);
+ break;
+ }
+ if(ret < CL_SUCCESS) p = NULL;
+ return p;
+}
+
+cl_int GLCLDraw::UnMapTransferBuffer(Uint8 *p)
+{
+ cl_int ret;
+ if(p == NULL) return CL_INVALID_MEM_OBJECT;
+ ret = clEnqueueUnmapMemObject(command_queue, inbuf[inbuf_bank],
+ p, 0, NULL, &event_uploadvram[1]);
+ return ret;
+}
+
+void GLCLDraw::AddPalette(int line, Uint8 mpage, BOOL analog)
+{
+ struct palettebuf_t *p;
+
+ if(palettebuf == NULL) return;
+ p = palettebuf;
+ AG_MutexLock(&mutex_palette);
+ if(analog) {
+ int i;
+ Uint32 lines;
+ if(line < 0) line = 0;
+ if(line > 199) line = 199;
+ lines = p->alines_h * 256 + p->alines_l;
+ if((lastline != line) || (lines == 1)) {
+ lastline = line;
+ lines++;
+ if(lines > 199) {
+ AG_MutexUnlock(&mutex_palette);
+ return;
+ }
+
+ }
+ //printf("AddPalette %d\n", lines);
+ p->alines_h = lines / 256;
+ p->alines_l = lines % 256;
+ p->atbls[lines - 1].line_h = line / 256;
+ p->atbls[lines - 1].line_l = line % 256;
+ p->atbls[lines - 1].mpage = mpage;
+ for(i = 0; i < 4096; i++) {
+ p->atbls[lines - 1].r_4096[i] = apalet_r[i];
+ p->atbls[lines - 1].g_4096[i] = apalet_g[i];
+ p->atbls[lines - 1].b_4096[i] = apalet_b[i];
+ }
+ } else {
+ int i;
+ Uint32 lines;
+ int h = 199;
+
+ if(bMode == SCR_400LINE) h = 399;
+ if(line < 0) line = 0;
+ if(line > h) line = h;
+ lines = p->dlines_h * 256 + p->dlines_l;
+ if((lastline != line) || (lines == 1)) {
+ lines++;
+ lastline = line;
+ if(lines > h) {
+ AG_MutexUnlock(&mutex_palette);
+ return;
+ }
+
+ }
+ p->dlines_h = lines / 256;
+ p->dlines_l = lines % 256;
+ p->dtbls[lines - 1].line_h = line / 256;
+ p->dtbls[lines - 1].line_l = line % 256;
+ p->dtbls[lines - 1].mpage = mpage;
+ for(i = 0; i < 7; i++) {
+ p->dtbls[lines - 1].tbl[i] = ttl_palet[i];
+ }
+ }
+ AG_MutexUnlock(&mutex_palette);
+
+}
+
+void GLCLDraw::ResetPalette(void)
+{
+ struct palettebuf_t *pold, *pnew;
+ int newline;
+ int endline;
+ cl_int r;
+ cl_event ev_unmap, ev_map;
+ int i;
+
+
+ // CopyPalette();
+ AG_MutexLock(&mutex_palette);
+ // pold = palettebuf;
+ pnew = palettebuf;
+ lastline = 0;
+ if(pnew != NULL) {
+ palettebuf = pnew;
+ {
+ pnew->alines_h = 0;
+ pnew->alines_l = 1;
+ pnew->atbls[0].line_h = 0;
+ pnew->atbls[0].line_l = 0;
+ pnew->atbls[0].mpage = multi_page;
+ for(i = 0; i < 4096; i++) {
+ pnew->atbls[0].r_4096[i] = apalet_r[i];
+ pnew->atbls[0].g_4096[i] = apalet_g[i];
+ pnew->atbls[0].b_4096[i] = apalet_b[i];
+ }
+ }
+ {
+ pnew->dlines_h = 0;
+ pnew->dlines_l = 1;
+ pnew->dtbls[0].line_h = 0;
+ pnew->dtbls[0].line_l = 0;
+ pnew->dtbls[0].mpage = multi_page;
+ for(i = 0; i < 8; i++) pnew->dtbls[0].tbl[i] = ttl_palet[i];
+ }
+ }
+ AG_MutexUnlock(&mutex_palette);
+// clFinish(command_queue);
+// clFlush(command_queue);
+}
+
+void GLCLDraw::CopyPalette(void)
+{
+ struct palettebuf_t *pold, *pnew;
+ int newline;
+ int endline;
+ int alines;
+ int dlines;
+ cl_int r;
+ cl_event ev_unmap, ev_map;
+ int i;
+
+ AG_MutexLock(&mutex_palette);
+
+ pold = palettebuf;
+ newline = palette_bank + 1;
+ if(newline >= 2) newline = 0;
+
+ pnew = clEnqueueMapBuffer(command_queue, palette_buf[newline], CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+ 0, (size_t)sizeof(struct palettebuf_t),
+ 0, NULL, &ev_map, &r);
+ if(r < CL_SUCCESS) {
+ AG_MutexUnlock(&mutex_palette);
+ return;
+ }
+ alines = pold->alines_h * 256 + pold->alines_l;
+ dlines = pold->dlines_h * 256 + pold->dlines_l;
+ if(alines < 0) alines = 0;
+ if(alines > 199) alines = 199;
+ if(dlines < 0) dlines = 0;
+ if(dlines > 399) dlines = 399;
+
+ if((pold != NULL) && (pnew != NULL)) {
+ memcpy(pnew, pold, sizeof(Uint8) * 4 + sizeof(struct apalettetbl_t) * alines); // Copy Lines + Analog Palette
+ memcpy(&(pnew->dtbls[0]), &(pold->dtbls[0]), sizeof(struct dpalettetbl_t) * dlines); // Copy Digital Palette
+ palettebuf = pnew;
+
+ clEnqueueUnmapMemObject(command_queue, palette_buf[palette_bank],
+ pold, 1, &ev_map,
+ &ev_unmap);
+ palette_bank_old = palette_bank;
+ palette_bank = newline;
+ }
+
+ AG_MutexUnlock(&mutex_palette);
+ //clFinish(command_queue);
+ clFlush(command_queue);
+}
+
+
+
+
+
+cl_int GLCLDraw::GetVram(int bmode)
+{
+ cl_int ret = 0;
+ cl_int r;
+ cl_kernel *kernel = NULL;
+ int w = 0;
+ int h = 0;
+ Uint8 *pr,*pg,*pb;
+ size_t lws[] = {10}; // local jobs.
+ size_t gws[] = {nCLGlobalWorkThreads}; // Parallel jobs.
+ size_t *goff = NULL;
+ int mpage = multi_page;
+ int dummy = 0;
+ int vpage;
+ int crtflag = crt_flag;
+ int bank;
+ BOOL flag = FALSE;
+ int i;
+ cl_float4 bright;
+ cl_event copy_event;
+
+ bright.s[0] = fBrightR; // R
+ bright.s[1] = fBrightG; // G
+ bright.s[2] = fBrightB; // B
+ bright.s[3] = 1.0; // A
+
+
+ //if(inbuf == NULL) return -1;
+ if(outbuf == NULL) return -1;
+ //if(TransferBuffer == NULL) return -1;
+ /*
+ * Swap Buffer
+ */
+ {
+ size_t transfer_size = 0;
+ bank = inbuf_bank;
+ Uint8 *p;
+ p = GetBufPtr(0); // Maybe okay?
+ for(i = 0; i < 400 ; i++) {
+ if(bDrawLine[i]) flag = TRUE;
+ bDrawLine[i] = FALSE;
+ }
+ if(flag) {
+ ret = UnMapTransferBuffer(p);
+ if(ret < CL_SUCCESS) {
+ ReleaseBufPtr();
+ return ret;
+ }
+ }
+ switch(bmode){
+ case SCR_200LINE:
+ transfer_size = 0x4000 * 3;
+ break;
+ case SCR_400LINE:
+ transfer_size = 0x8000 * 3;
+ break;
+ case SCR_4096:
+ transfer_size = 0x2000 * 12;
+ break;
+ case SCR_262144:
+ transfer_size = 0x2000 * 18;
+ break;
+ }
+ if((flag != FALSE) && (transfer_size > 0)){
+ inbuf_bank++;
+ if(inbuf_bank >= 2) inbuf_bank = 0;
+ if(kernel_copyvram != NULL) {
+ size_t lws_copy[] = {1};
+ size_t gws_copy[] = {gws[0]};
+
+ cl_int size = transfer_size;
+ ret |= clSetKernelArg(*kernel_copyvram, 0, sizeof(cl_mem), (void *)&(inbuf[inbuf_bank]));
+ ret |= clSetKernelArg(*kernel_copyvram, 1, sizeof(cl_mem), (void *)&(inbuf[bank]));
+ ret |= clSetKernelArg(*kernel_copyvram, 2, sizeof(cl_int), &size);
+ ret |= clSetKernelArg(*kernel_copyvram, 3, sizeof(cl_int), &bCLSparse);
+ if(bCLSparse) {
+ ret = clEnqueueNDRangeKernel(command_queue, *kernel_copyvram, 1,
+ goff, gws_copy, lws_copy,
+ 0, NULL, ©_event);
+ } else {
+ ret = clEnqueueTask(command_queue,
+ *kernel_copyvram, 0, NULL, ©_event);
+ }
+ } else {
+ ret = clEnqueueCopyBuffer(command_queue, inbuf[bank], inbuf[inbuf_bank], 0,
+ 0, transfer_size, 0, NULL,
+ ©_event);
+ }
+ clFinish(command_queue);
+ TransferBuffer = MapTransferBuffer(bmode);
+ }
+ ReleaseBufPtr();
+ if(TransferBuffer == NULL) return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ }
+ if((flag) || bPaletFlag || SDLDrawFlag.APaletteChanged || SDLDrawFlag.DPaletteChanged) {
+ kernel = NULL;
+ LockVram();
+ SDLDrawFlag.APaletteChanged = FALSE;
+ SDLDrawFlag.DPaletteChanged = FALSE;
+ SDLDrawFlag.Drawn = FALSE;
+ bPaletFlag = FALSE;
+ UnlockVram();
+ CopyPalette();
+ switch(bmode) {
+ case SCR_400LINE:
+ case SCR_200LINE:
+ w = 640;
+ h = 200;
+ if(bmode == SCR_400LINE) h = 400;
+ vpage = (~(multi_page >> 4)) & 0x07;
+ //gws[0] = h;
+ if(kernel_8colors != NULL) kernel = kernel_8colors;
+ if(kernel != NULL) {
+ ret |= clSetKernelArg(*kernel, 0, sizeof(cl_mem), (void *)&(inbuf[bank]));
+ ret |= clSetKernelArg(*kernel, 1, sizeof(int), (void *)&w);
+ ret |= clSetKernelArg(*kernel, 2, sizeof(int), (void *)&h);
+ ret |= clSetKernelArg(*kernel, 3, sizeof(cl_mem), (void *)&outbuf);
+ ret |= clSetKernelArg(*kernel, 4, sizeof(cl_mem), (void *)&palette_buf[palette_bank_old]);
+ ret |= clSetKernelArg(*kernel, 5, sizeof(cl_mem), (void *)&table);
+ ret |= clSetKernelArg(*kernel, 6, sizeof(int), (void *)&bCLSparse);
+ ret |= clSetKernelArg(*kernel, 7, sizeof(int), (void *)&crtflag);
+ ret |= clSetKernelArg(*kernel, 8, sizeof(cl_float4), (void *)&bright);
+ clFlush(command_queue);
+ }
+ break;
+ case SCR_262144:// Windowはなし
+ w = 320;
+ h = 200;
+ //gws[0] = h;
+
+ // if(kernel == NULL) kernel = clCreateKernel(program, "getvram256k", &ret);
+ if(kernel_256kcolors != NULL) kernel = kernel_256kcolors;
+ if(kernel != NULL) {
+ /*
+ * Below transfer is dummy.
+ */
+ ret |= clSetKernelArg(*kernel, 0, sizeof(cl_mem), (void *)&(inbuf[bank]));
+ ret |= clSetKernelArg(*kernel, 1, sizeof(cl_int), (void *)&w);
+ ret |= clSetKernelArg(*kernel, 2, sizeof(cl_int), (void *)&h);
+ ret |= clSetKernelArg(*kernel, 3, sizeof(cl_mem), (void *)&outbuf);
+ ret |= clSetKernelArg(*kernel, 4, sizeof(cl_mem), (void *)&table);
+ ret |= clSetKernelArg(*kernel, 5, sizeof(cl_uint), (void *)&mpage);
+ ret |= clSetKernelArg(*kernel, 6, sizeof(cl_int), (void *)&bCLSparse);
+ ret |= clSetKernelArg(*kernel, 7, sizeof(cl_int), (void *)&crtflag);
+ ret |= clSetKernelArg(*kernel, 8, sizeof(cl_float4), (void *)&bright);
+ clFlush(command_queue);
+ }
+ break;
+ case SCR_4096:
+ w = 320;
+ h = 200;
+ //gws[0] = h;
+ //if(kernel == NULL) kernel = clCreateKernel(program, "getvram4096", &ret);
+ if(kernel_4096colors != NULL) kernel = kernel_4096colors;
+ if(kernel != NULL) {
+
+ ret |= clSetKernelArg(*kernel, 0, sizeof(cl_mem), (void *)&(inbuf[bank]));
+ ret |= clSetKernelArg(*kernel, 1, sizeof(cl_int), (void *)&w);
+ ret |= clSetKernelArg(*kernel, 2, sizeof(cl_int), (void *)&h);
+ ret |= clSetKernelArg(*kernel, 3, sizeof(cl_mem), (void *)&outbuf);
+ ret |= clSetKernelArg(*kernel, 4, sizeof(cl_mem), (void *)&(palette_buf[palette_bank_old]));
+ ret |= clSetKernelArg(*kernel, 5, sizeof(cl_mem), (void *)&table);
+ ret |= clSetKernelArg(*kernel, 6, sizeof(cl_int), (void *)&bCLSparse);
+ ret |= clSetKernelArg(*kernel, 7, sizeof(cl_int), (void *)&crtflag);
+ ret |= clSetKernelArg(*kernel, 8, sizeof(cl_float4), (void *)&bright);
+ //clFinish(command_queue);
+ clFlush(command_queue);
+ }
+ break;
+ }
+ w2 = w;
+ h2 = h;
+ //CopyPalette();
+ if(bCLEnableKhrGLShare != 0) {
+ glFlush();
+ ret |= clEnqueueAcquireGLObjects (command_queue,
+ 1, (cl_mem *)&outbuf,
+ 0, NULL, &event_copytotexture);
+ if(kernel != NULL) {
+ if(bCLSparse) {
+ ret = clEnqueueNDRangeKernel(command_queue, *kernel, 1,
+ goff, gws, lws,
+ 1, &event_copytotexture, &event_exec);
+ } else {
+ ret = clEnqueueTask(command_queue,
+ *kernel, 1, &event_copytotexture, &event_exec);
+ }
+ }
+
+ ret |= clEnqueueReleaseGLObjects (command_queue,
+ 1, (cl_mem *)&outbuf,
+ 1, &event_exec, &event_release);
+ clFinish(command_queue);
+// glFinish();
+ } else {
+ if(kernel != NULL) {
+ if(bCLSparse) {
+ ret = clEnqueueNDRangeKernel(command_queue, *kernel, 1,
+ goff, gws, lws,
+ 1, &event_uploadvram[2], &event_exec);
+ } else {
+ ret = clEnqueueTask(command_queue,
+ *kernel, 1, &event_uploadvram[2], &event_exec);
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+
+cl_int GLCLDraw::SetupTable(void)
+{
+ cl_int r = CL_INVALID_KERNEL;
+ cl_uint pages;
+ cl_event tbl_ev;
+ pages = 6;
+
+ if(kernel_table != NULL) {
+ r = 0;
+ r |= clSetKernelArg(*kernel_table, 0, sizeof(cl_mem), (void *)&table);
+ r |= clSetKernelArg(*kernel_table, 1, sizeof(cl_uint), (void *)&pages);
+ r |= clEnqueueTask(command_queue,
+ *kernel_table, 0, NULL, NULL);
+ clFinish(command_queue);
+ }
+ return r;
+}
+
+
+cl_int GLCLDraw::SetupBuffer(GLuint *texid)
+{
+ cl_int ret = 0;
+ cl_int r = 0;
+ cl_event ev;
+ unsigned int size = 640 * 400 * sizeof(cl_uchar4);
+ int i;
+
+ inbuf_bank = 0;
+ for(i = 0; i < 2; i++) {
+ inbuf[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | 0, // Reduce HOST-CPU usage.
+ (size_t)(0x2000 * 18 * sizeof(Uint8)), NULL, &r);
+ ret |= r;
+ if(r == CL_SUCCESS){
+ cl_int r2;
+ cl_event cl_event_map;
+ cl_event cl_event_unmap;
+ Uint8 *p;
+
+ p = clEnqueueMapBuffer(command_queue, inbuf[i], CL_TRUE, CL_MAP_WRITE,
+ 0, (size_t)(0x2000 * 18 * sizeof(Uint8)),
+ 0, NULL, &cl_event_map, &r2);
+ if((r2 >= CL_SUCCESS) && (p != NULL)) {
+ memset(p, 0x00, (size_t)(0x2000 * 18 * sizeof(Uint8)));
+ clEnqueueUnmapMemObject(command_queue, inbuf[i],
+ p, 1, &cl_event_map,
+ &cl_event_unmap);
+ clFinish(command_queue);
+ }
+ }
+ XM7_DebugLog(XM7_LOG_INFO, "CL: Alloc STS: inbuf[%d] : %d", i, r);
+ palette_buf[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | 0, // Reduce HOST-CPU usage.
+ (size_t)sizeof(struct palettebuf_t), NULL, &r);
+ ret = r;
+ if(r == CL_SUCCESS){
+ cl_int r2;
+ cl_event cl_event_map;
+ cl_event cl_event_unmap;
+ Uint8 *p;
+ p = clEnqueueMapBuffer(command_queue, palette_buf[i], CL_TRUE, CL_MAP_WRITE,
+ 0, (size_t)sizeof(struct palettebuf_t),
+ 0, NULL, &cl_event_map, &r2);
+ if((r2 >= CL_SUCCESS) && (p != NULL)) {
+ memset(p, 0x00, (size_t)sizeof(struct palettebuf_t));
+ clEnqueueUnmapMemObject(command_queue, palette_buf[i],
+ p, 1, &cl_event_map,
+ &cl_event_unmap);
+ clFinish(command_queue);
+ }
+ }
+ XM7_DebugLog(XM7_LOG_INFO, "CL: Alloc STS: palette_buf[%d] : %d", i, r);
+ }
+ TransferBuffer = MapTransferBuffer(SCR_262144);
+ palettebuf = clEnqueueMapBuffer(command_queue, palette_buf[0], CL_TRUE, CL_MAP_WRITE,
+ 0, (size_t)sizeof(struct palettebuf_t),
+ 0, NULL, &ev, &r);
+ ResetPalette();
+ table = clCreateBuffer(context, CL_MEM_READ_WRITE | 0,
+ (size_t)(0x100 * 8 * 6 * sizeof(cl_uint)), NULL, &r);
+ ret |= r;
+ XM7_DebugLog(XM7_LOG_INFO, "CL: Alloc STS: table : %d", r);
+
+ // Texture直接からPBO使用に変更 20121102
+ if((bCLEnableKhrGLShare != 0) && (bGL_PIXEL_UNPACK_BUFFER_BINDING != FALSE)){
+ glGenBuffers(1, &pbo);
+ if(pbo <= 0) {
+ bCLEnableKhrGLShare = FALSE;
+ goto _fallback;
+ }
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);
+ //glBufferData(GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_DYNAMIC_DRAW);
+ glBufferData(GL_PIXEL_UNPACK_BUFFER, size, NULL, GL_STREAM_DRAW);
+ // XM7_DebugLog(XM7_LOG_DEBUG, "CL: PBO=%08x Size=%d context=%08x", pbo, size, context);
+ outbuf = clCreateFromGLBuffer(context, CL_MEM_WRITE_ONLY | 0,
+ pbo, &r);
+ if(r != GL_NO_ERROR) {
+ glDeleteBuffers(1, &pbo);
+// pbo = 0;
+ bCLEnableKhrGLShare = FALSE;
+ goto _fallback;
+ }
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ ret |= r;
+ XM7_DebugLog(XM7_LOG_INFO, "CL: Alloc STS: outbuf (GLCL Interop): %d", r);
+ return ret;
+ }
+ _fallback:
+ outbuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+ (size_t)(640 * 400 * sizeof(Uint32)), NULL, &r);
+ ret |= r;
+ XM7_DebugLog(XM7_LOG_INFO, "CL: Alloc STS: outbuf (CL): %d", r);
+ return ret;
+}
+
+GLuint GLCLDraw::GetPbo(void)
+{
+ return pbo;
+}
--- /dev/null
+/*
+ * Header for CL with GL
+ * (C) 2012 K.Ohta
+ * Notes:
+ * Not CL model: VramDraw->[Virtual Vram]->AGEventDraw2->drawUpdateTexture->[GL Texture]->Drawing
+ * CL Model: AGEvenDraw2 -> GLCL_DrawEventSub -> [GL/CL Texture] ->Drawing
+ * History:
+ * Nov 01,2012: Initial.
+ */
+#include <SDL/SDL.h>
+#include <agar/core.h>
+
+#ifdef _WINDOWS
+#include <GL/gl.h>
+#include <GL/glext.h>
+#else
+#include <GL/glx.h>
+#include <GL/glxext.h>
+#endif
+
+#ifdef _USE_OPENCL
+#include <CL/cl.h>
+#include <CL/cl_gl.h>
+#include <CL/cl_gl_ext.h>
+
+#if 1
+#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
+#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
+#endif
+
+extern "C" {
+ #include "xm7_types.h"
+ extern BYTE ttl_palet[8];
+ extern BYTE apalet_b[4096];
+ extern BYTE apalet_r[4096];
+ extern BYTE apalet_g[4096];
+ extern BYTE multi_page;
+};
+
+extern GLuint uVramTextureID;
+
+struct apalettetbl_t {
+ Uint8 line_h;
+ Uint8 line_l;
+ Uint8 mpage;
+ Uint8 r_4096[4096];
+ Uint8 g_4096[4096];
+ Uint8 b_4096[4096];
+} __attribute__((packed));
+
+struct dpalettetbl_t {
+ Uint8 line_h;
+ Uint8 line_l;
+ Uint8 mpage;
+ Uint8 tbl[8];
+}__attribute__((packed));
+
+struct palettebuf_t {
+ Uint8 alines_h;
+ Uint8 alines_l;
+ Uint8 dlines_h;
+ Uint8 dlines_l;
+ struct apalettetbl_t atbls[200];
+ struct dpalettetbl_t dtbls[400];
+}__attribute__((packed));
+
+
+class GLCLDraw {
+ public:
+ GLCLDraw();
+ ~GLCLDraw();
+ cl_int GetVram(int bmode);
+ cl_int BuildFromSource(const char *p);
+ cl_int SetupBuffer(GLuint *texid);
+ cl_int SetupTable(void);
+ cl_int InitContext(int platformnum, int processornum, int GLinterop);
+ int GetPlatforms(void);
+ int GetUsingDeviceNo(void);
+ int GetDevices(void);
+ void GetDeviceType(char *str, int maxlen, int num);
+ void GetDeviceName(char *str, int maxlen, int num);
+ Uint8 *MapTransferBuffer(int bmode);
+ cl_int UnMapTransferBuffer(Uint8 *p);
+ GLuint GetPbo(void);
+ int GetGLEnabled(void);
+ Uint32 *GetPixelBuffer(void);
+ int ReleasePixelBuffer(Uint32 *p);
+ Uint8 *GetBufPtr(Uint32 timeout);
+ void ReleaseBufPtr(void);
+ void AddPalette(int line, Uint8 mpage, BOOL analog);
+ void ResetPalette(void);
+ void CopyPalette(void);
+ cl_context context = NULL;
+ cl_command_queue command_queue = NULL;
+
+ /* Program Object */
+ const char *source = NULL;
+ cl_program program = NULL;
+ cl_int ret_num_devices;
+ cl_int ret_num_platforms;
+ cl_int platform_num = 0;
+ cl_platform_id platform_id[8];
+ cl_device_id device_id[8];
+
+ private:
+ CL_CALLBACK LogProgramExecute(cl_program program, void *userdata);
+ CL_CALLBACK (*build_callback)(cl_program, void *);
+ int w2 = 0;
+ int h2 = 0;
+ cl_event event_exec;
+ cl_event event_uploadvram[4];
+ cl_event event_copytotexture;
+ cl_event event_release;
+ cl_kernel kernels_array[16];
+ cl_kernel *kernel_8colors = NULL;
+ cl_kernel *kernel_4096colors = NULL;
+ cl_kernel *kernel_256kcolors = NULL;
+ cl_kernel *kernel_table = NULL;
+ cl_kernel *kernel_copyvram = NULL;
+ cl_uint nkernels;
+
+ int inbuf_bank = 0;
+ int palette_bank = 0;
+ int palette_bank_old = 0;
+ cl_mem inbuf[2] = {NULL, NULL};
+ cl_mem palette_buf[2] = {NULL, NULL};
+ cl_mem outbuf = NULL;
+ cl_mem internalpal = NULL;
+ cl_mem table = NULL;
+ cl_context_properties *properties = NULL;
+ GLuint pbo = 0;
+ int lastline;
+ int using_device = 0;
+ int bCLEnableKhrGLShare = 0;
+ Uint32 *pixelBuffer = NULL;
+ Uint8 *TransferBuffer = NULL;
+ struct palettebuf_t *palettebuf = NULL;
+ int bModeOld = -1;
+ cl_device_type device_type[8];
+ cl_ulong local_memsize[8];
+ AG_Mutex mutex_buffer;
+ AG_Mutex mutex_palette;
+};
+
+enum {
+ CLKERNEL_8 = 0,
+ CLKERNEL_4096,
+ CLKERNEL_256K,
+ CLKERNEL_END
+};
+
+#endif /* _USE_OPENCL */
--- /dev/null
+/*\r
+ * agar_gldraw.h\r
+ *\r
+ * Created on: 2011/01/21\r
+ * Author: whatisthis\r
+ */\r
+\r
+#ifndef AGAR_GLDRAW_H_\r
+#define AGAR_GLDRAW_H_\r
+\r
+#include <agar/core/types.h>\r
+#include <agar/core.h>\r
+#include <agar/gui.h>\r
+#include "api_draw.h"\r
+#include "api_vram.h"\r
+\r
+#include "agar_vramutil.h"\r
+#include "agar_draw.h"\r
+#include "agar_glutil.h"\r
+\r
+extern BOOL EventSDL(AG_Driver *drv);\r
+extern BOOL EventGUI(AG_Driver *drv);\r
+\r
+#ifdef USE_OPENGL\r
+extern void DrawOSDGL(AG_GLView *w);\r
+\r
+extern void AGEventScaleGL(AG_Event *event);\r
+extern void AGEventDrawGL(AG_Event *event);\r
+\r
+extern void AGEventOverlayGL(AG_Event *event);\r
+extern void AGEventMouseMove_AG_GL(AG_Event *event);\r
+extern void AGEventKeyRelease_AG_GL(AG_Event *event);\r
+extern void AGEventKeyPress_AG_GL(AG_Event *event);\r
+\r
+extern void InitGL_AG_GL(int w, int h);\r
+extern void Detach_AG_GL();\r
+/*\r
+ * agar_gldraw2.cpp\r
+ */\r
+extern void InitGL_AG2(int w, int h);\r
+extern void DetachGL_AG2(void);\r
+\r
+extern void AGEventDrawGL2(AG_Event *event);\r
+extern void AGEventKeyUpGL(AG_Event *event);\r
+extern void AGEventKeyDownGL(AG_Event *event);\r
+\r
+extern GLuint uVramTextureID;\r
+extern GLuint uNullTextureID;\r
+#endif /* USE_OPENGL */\r
+#endif /* AGAR_GLDRAW_H_ */\r
--- /dev/null
+/*
+ * agar_gldraw2.cpp
+ * Using Indexed palette @8Colors.
+ * (c) 2011 K.Ohta <whatisthis.sowhat@gmail.com>
+ */
+
+#include <agar/core.h>
+#include <agar/core/types.h>
+#include <agar/gui.h>
+
+#include <SDL/SDL.h>
+#ifdef _WINDOWS
+#include <GL/gl.h>
+#include <GL/glext.h>
+#else
+#include <GL/glx.h>
+#include <GL/glxext.h>
+#endif
+#ifdef _USE_OPENCL
+# include "agar_glcl.h"
+#endif
+
+#ifdef USE_OPENMP
+#include <omp.h>
+#endif //_OPENMP
+
+#include "api_draw.h"
+//#include "api_scaler.h"
+#include "api_kbd.h"
+
+#include "agar_xm7.h"
+#include "agar_draw.h"
+#include "agar_gldraw.h"
+#include "agar_cfg.h"
+#include "xm7.h"
+#include "display.h"
+#include "subctrl.h"
+#include "device.h"
+
+
+GLuint uVramTextureID;
+GLuint uNullTextureID;
+#ifdef _USE_OPENCL
+extern class GLCLDraw *cldraw;
+extern void InitContextCL(void);
+#endif
+
+extern void InitGL_AG2(int w, int h);
+extern void DetachGL_AG2(void);
+
+// Grids
+extern GLfloat *GridVertexs200l;
+extern GLfloat *GridVertexs400l;
+
+// Brights
+float fBrightR;
+float fBrightG;
+float fBrightB;
+
+
+void SetBrightRGB_AG_GL2(float r, float g, float b)
+{
+ fBrightR = r;
+ fBrightG = g;
+ fBrightB = b;
+ SDLDrawFlag.Drawn = TRUE; // Force draw.
+}
+
+
+
+/*
+ * Event Functins
+ */
+
+void AGEventOverlayGL(AG_Event *event)
+{
+ AG_GLView *glv = (AG_GLView *)AG_SELF();
+}
+
+
+void AGEventScaleGL(AG_Event *event)
+{
+ AG_GLView *glv = (AG_GLView *)AG_SELF();
+
+ glViewport(glv->wid.rView.x1, glv->wid.rView.y1, glv->wid.rView.w, glv->wid.rView.h);
+ //glLoadIdentity();
+ //glOrtho(-1.0, 1.0, 1.0, -1.0, -1.0, 1.0);
+
+}
+
+
+static void drawGrids(void *pg,int w, int h)
+{
+ AG_GLView *glv = (AG_GLView *)pg;
+
+
+}
+
+
+static void drawUpdateTexture(Uint32 *p, int w, int h, BOOL crtflag)
+{
+ if(uVramTextureID != 0){
+ Uint32 *pu;
+ Uint32 *pq;
+ int xx;
+ int yy;
+ int ww;
+ int hh;
+ int ofset;
+ BOOL flag;
+ int i;
+ // glPushAttrib(GL_TEXTURE_BIT);
+ ww = w >> 3;
+ hh = h >> 3;
+
+#ifdef _USE_OPENCL
+ if((cldraw != NULL) && (bCLEnabled)) {
+ cl_int ret = CL_SUCCESS;
+ LockVram();
+ flag = FALSE;
+ for(i = 0; i < h; i++) {
+ if(bDrawLine[i]) {
+ flag = TRUE;
+ }
+ }
+ if(SDLDrawFlag.Drawn) flag = TRUE;
+ if(flag) {
+ ret = cldraw->GetVram(bModeOld);
+ for(i = 0; i < h; i++) bDrawLine[i] = FALSE;
+
+ if(ret != CL_SUCCESS) {
+ SDLDrawFlag.Drawn = FALSE;
+ bPaletFlag = FALSE;
+ glBindTexture(GL_TEXTURE_2D, 0);
+ UnlockVram();
+ return;
+ }
+ }
+ if(bCLGLInterop){
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, cldraw->GetPbo());
+ glBindTexture(GL_TEXTURE_2D, uVramTextureID);
+ // Copy pbo to texture
+ glTexSubImage2D(GL_TEXTURE_2D,
+ 0,
+ 0,
+ 0,
+ w,
+ h,
+ GL_RGBA,
+ GL_UNSIGNED_BYTE,
+ NULL);
+ glFinish();
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ glBindTexture(GL_TEXTURE_2D, 0);
+ glFinish();
+ } else { // Not interoperability with GL
+ Uint32 *pp;
+ pp = cldraw->GetPixelBuffer();
+ glBindTexture(GL_TEXTURE_2D, uVramTextureID);
+ if(pp != NULL) glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
+ w, h, GL_RGBA, GL_UNSIGNED_BYTE, pp);
+ glFinish();
+ cldraw->ReleasePixelBuffer(pp);
+ glBindTexture(GL_TEXTURE_2D, 0);
+ glFinish();
+ }
+ SDLDrawFlag.Drawn = FALSE;
+ bPaletFlag = FALSE;
+ UnlockVram();
+ } else {
+#endif
+ LockVram();
+ flag = FALSE;
+ for(i = 0; i < h; i++) {
+ if(bDrawLine[i]) {
+ flag = TRUE;
+ bDrawLine[i] = FALSE;
+ }
+ }
+ flag |= SDLDrawFlag.Drawn;
+ if((p != NULL) && (flag)) {
+ if(crtflag != FALSE) {
+ glBindTexture(GL_TEXTURE_2D, uVramTextureID);
+ glTexSubImage2D(GL_TEXTURE_2D,
+ 0,
+ 0,
+ 0,
+ 640,
+ h,
+ GL_RGBA,
+ GL_UNSIGNED_BYTE,
+ p);
+ glFinish();
+ glBindTexture(GL_TEXTURE_2D, 0); // 20111023 チラつきなど抑止
+ }
+ bPaletFlag = FALSE;
+ SDLDrawFlag.Drawn = FALSE;
+ }
+ UnlockVram();
+#ifdef _USE_OPENCL
+ }
+#endif
+ }
+}
+
+
+
+
+
+
+/*
+ * "Draw"イベントハンドラ
+ */
+
+void AGEventDrawGL2(AG_Event *event)
+{
+ AG_GLView *glv = (AG_GLView *)AG_SELF();
+ int w;
+ int h;
+ int i;
+ float width;
+ float yf;
+ Uint32 *p;
+ Uint32 *pp;
+ int x;
+ int y;
+ GLfloat TexCoords[4][2];
+ GLfloat Vertexs[4][3];
+ GLfloat TexCoords2[4][2];
+ GLfloat *gridtid;
+ BOOL crtflag = crt_flag;
+
+ p = pVram2;
+ if((p == NULL) && (bCLEnabled == FALSE)) return;
+ switch(bModeOld) {
+ case SCR_400LINE:
+ w = 640;
+ h = 400;
+ TexCoords[0][0] = TexCoords[3][0] = 0.0f; // Xbegin
+ TexCoords[0][1] = TexCoords[1][1] = 0.0f; // Ybegin
+
+ TexCoords[2][0] = TexCoords[1][0] = 640.0f / 640.0f; // Xend
+ TexCoords[2][1] = TexCoords[3][1] = 399.0f / 400.0f; // Yend
+ gridtid = GridVertexs400l;
+ break;
+ case SCR_200LINE:
+ w = 640;
+ h = 200;
+ TexCoords[0][0] = TexCoords[3][0] = 0.0f; // Xbegin
+ TexCoords[0][1] = TexCoords[1][1] = 0.0f; // Ybegin
+
+ TexCoords[2][0] = TexCoords[1][0] = 640.0f / 640.0f; // Xend
+ TexCoords[2][1] = TexCoords[3][1] = 199.0f / 400.0f; // Yend
+ gridtid = GridVertexs200l;
+ break;
+ case SCR_262144:
+ case SCR_4096:
+ default:
+ w = 320;
+ h = 200;
+ TexCoords[0][0] = TexCoords[3][0] = 0.0f; // Xbegin
+ TexCoords[0][1] = TexCoords[1][1] = 0.0f; // Ybegin
+
+ TexCoords[2][0] = TexCoords[1][0] = 320.0f / 640.0f; // Xend
+ TexCoords[2][1] = TexCoords[3][1] = 199.0f / 400.0f; // Yend
+ gridtid = GridVertexs200l;
+ break;
+ }
+
+ Vertexs[0][2] = Vertexs[1][2] = Vertexs[2][2] = Vertexs[3][2] = -0.98f;
+ Vertexs[0][0] = Vertexs[3][0] = -1.0f; // Xbegin
+ Vertexs[0][1] = Vertexs[1][1] = 1.0f; // Yend
+ Vertexs[2][0] = Vertexs[1][0] = 1.0f; // Xend
+ Vertexs[2][1] = Vertexs[3][1] = -1.0f; // Ybegin
+
+
+ if(uVramTextureID == 0) uVramTextureID = CreateNullTexture(640, 400); // ドットゴーストを防ぐ
+ if(uNullTextureID == 0) uNullTextureID = CreateNullTexture(640, 400); // ドットゴーストを防ぐ
+ /*
+ * 20110904 OOPS! Updating-Texture must be in Draw-Event-Handler(--;
+ */
+
+ glPushAttrib(GL_TEXTURE_BIT);
+ glPushAttrib(GL_TRANSFORM_BIT);
+ glPushAttrib(GL_ENABLE_BIT);
+ InitContextCL();
+
+ glMatrixMode(GL_PROJECTION);
+ glLoadIdentity();
+
+
+ glEnable(GL_DEPTH_TEST);
+ glDisable(GL_BLEND);
+
+ /*
+ * VRAMの表示:テクスチャ貼った四角形
+ */
+ //if(uVramTextureID != 0) {
+
+ if((bMode == bModeOld) && (crtflag)){
+ drawUpdateTexture(p, w, h, crtflag);
+ glEnable(GL_TEXTURE_2D);
+ glBindTexture(GL_TEXTURE_2D, uVramTextureID);
+ glColor4f(0.0f, 0.0f, 0.0f, 1.0f);
+ //} else {
+// glDisable(GL_TEXTURE_2D);
+// glColor4f(0.0f, 0.0f, 0.0f, 1.0f);
+// }
+ } else {
+ glEnable(GL_TEXTURE_2D);
+ glBindTexture(GL_TEXTURE_2D, uNullTextureID);
+ glColor4f(1.0f, 1.0f, 1.0f, 1.0f);
+ }
+ if(!bSmoosing) {
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ } else {
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ }
+ if(bGL_EXT_VERTEX_ARRAY) {
+ glEnable(GL_TEXTURE_COORD_ARRAY_EXT);
+ glEnable(GL_VERTEX_ARRAY_EXT);
+
+ glTexCoordPointerEXT(2, GL_FLOAT, 0, 4, TexCoords);
+ glVertexPointerEXT(3, GL_FLOAT, 0, 4, Vertexs);
+ glDrawArraysEXT(GL_POLYGON, 0, 4);
+
+ glDisable(GL_VERTEX_ARRAY_EXT);
+ glDisable(GL_TEXTURE_COORD_ARRAY_EXT);
+ } else {
+ glBegin(GL_POLYGON);
+ glTexCoord2f(TexCoords[0][0], TexCoords[0][1]);
+ glVertex3f(Vertexs[0][0], Vertexs[0][1], Vertexs[0][2]);
+
+ glTexCoord2f(TexCoords[1][0], TexCoords[1][1]);
+ glVertex3f(Vertexs[1][0], Vertexs[1][1], Vertexs[1][2]);
+
+ glTexCoord2f(TexCoords[2][0], TexCoords[2][1]);
+ glVertex3f(Vertexs[2][0], Vertexs[2][1], Vertexs[2][2]);
+
+ glTexCoord2f(TexCoords[3][0], TexCoords[3][1]);
+ glVertex3f(Vertexs[3][0], Vertexs[3][1], Vertexs[3][2]);
+ glEnd();
+ }
+ // }
+
+ // 20120502 輝度調整
+ glBindTexture(GL_TEXTURE_2D, 0); // 20111023
+ glDisable(GL_TEXTURE_2D);
+ glDisable(GL_DEPTH_TEST);
+
+ if(bCLEnabled == FALSE){
+ glEnable(GL_BLEND);
+
+ glColor3f(fBrightR , fBrightG, fBrightB);
+ glBlendFunc(GL_ZERO, GL_SRC_COLOR);
+
+ // glBlendFunc(GL_ZERO, GL_SRC_ALPHA);
+ if(bGL_EXT_VERTEX_ARRAY) {
+ glEnable(GL_VERTEX_ARRAY_EXT);
+ glVertexPointerEXT(3, GL_FLOAT, 0, 4, Vertexs);
+ glDrawArraysEXT(GL_POLYGON, 0, 4);
+ glDisable(GL_VERTEX_ARRAY_EXT);
+ } else {
+ glBegin(GL_POLYGON);
+ glVertex3f(Vertexs[0][0], Vertexs[0][1], Vertexs[0][2]);
+ glVertex3f(Vertexs[1][0], Vertexs[1][1], Vertexs[1][2]);
+ glVertex3f(Vertexs[2][0], Vertexs[2][1], Vertexs[2][2]);
+ glVertex3f(Vertexs[3][0], Vertexs[3][1], Vertexs[3][2]);
+ glEnd();
+ }
+
+ glBlendFunc(GL_ONE, GL_ZERO);
+
+ glDisable(GL_BLEND);
+ }
+ glDisable(GL_TEXTURE_2D);
+ glDisable(GL_DEPTH_TEST);
+ if((glv->wid.rView.h >= (h * 2)) && (bFullScan == 0)) {
+ glLineWidth((float)(glv->wid.rView.h) / (float)(h * 2));
+ glColor4f(0.0f, 0.0f, 0.0f, 1.0f);
+ if(bGL_EXT_VERTEX_ARRAY) {
+ glEnable(GL_VERTEX_ARRAY_EXT);
+ glVertexPointerEXT(3, GL_FLOAT, 0, h + 1, gridtid);
+ glDrawArraysEXT(GL_LINE, 0, h + 1);
+ glDisable(GL_VERTEX_ARRAY_EXT);
+ } else {
+ glBegin(GL_LINES);
+ for(y = 0; y < h; y++) {
+ yf = -1.0f + (float) (y + 1) * 2.0f / (float)h;
+ glVertex3f(-1.0f, yf, 0.96f);
+ glVertex3f(+1.0f, yf, 0.96f);
+ }
+ glEnd();
+ }
+
+ }
+ //}
+ glDisable(GL_BLEND);
+ glDisable(GL_TEXTURE_2D);
+ glDisable(GL_DEPTH_TEST);
+#ifdef USE_OPENGL
+ DrawOSDGL(glv);
+#endif
+ glPopAttrib();
+ glPopAttrib();
+ glPopAttrib();
+ glFlush();
+}
+
+void AGEventKeyUpGL(AG_Event *event)
+{
+ int key = AG_INT(1);
+ int mod = AG_INT(2);
+ Uint32 ucs = AG_ULONG(3);
+ OnKeyReleaseAG(key, mod, ucs);
+}
+
+void AGEventKeyDownGL(AG_Event *event)
+{
+ int key = AG_INT(1);
+ int mod = AG_INT(2);
+ Uint32 ucs = AG_ULONG(3);
+ OnKeyPressAG(key, mod, ucs);
+
+}
--- /dev/null
+/*
+ * Agar: OpenGLUtils
+ * (C) 2011 K.Ohta <whatisthis.sowhat@gmail.com>
+ */
+
+
+#include "agar_glutil.h"
+#ifdef _USE_OPENCL
+#include "agar_glcl.h"
+#endif
+#include "agar_logger.h"
+
+#ifdef USE_OPENMP
+#include <omp.h>
+#endif //_OPENMP
+
+extern "C" {
+ AG_GLView *GLDrawArea;
+ BOOL bInitCL = FALSE;
+ BOOL bCLEnabled = FALSE;
+ BOOL bCLGLInterop = FALSE;
+ int nCLGlobalWorkThreads = 10;
+ BOOL bCLSparse = FALSE; // TRUE=Multi threaded CL,FALSE = Single Thread.
+ int nCLPlatformNum;
+ int nCLDeviceNum;
+ BOOL bCLInteropGL;
+ extern BOOL bUseOpenCL;
+}
+
+GLfloat GridVertexs200l[202 * 6];
+GLfloat GridVertexs400l[402 * 6];
+
+// Brights
+extern float fBrightR;
+extern float fBrightG;
+extern float fBrightB;
+extern const char *cl_render;
+extern GLuint uVramTextureID;
+
+#ifdef _USE_OPENCL
+class GLCLDraw *cldraw = NULL;
+#endif
+
+
+
+GLuint CreateNullTexture(int w, int h)
+{
+ GLuint ttid;
+ Uint32 *p;
+
+ p =(Uint32 *)malloc((w + 2)* (h + 2) * sizeof(Uint32));
+ if(p == NULL) return 0;
+
+ // memset(p, 0x00, (w + 2) * (h + 2) * sizeof(Uint32));
+ memset(p, 0x00, (w + 2) * (h + 2) * sizeof(Uint32));
+ glGenTextures(1, &ttid);
+ glBindTexture(GL_TEXTURE_2D, ttid);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); // Limit mipmap level , reduce resources.
+ glTexImage2D(GL_TEXTURE_2D,
+ 0,
+ GL_RGBA,
+ w, h + 2,
+ 0,
+ GL_RGBA,
+ GL_UNSIGNED_BYTE,
+ p);
+ free(p);
+ return ttid;
+}
+
+GLuint CreateNullTextureCL(int w, int h)
+{
+ GLuint ttid;
+ Uint32 *p;
+
+ p =(Uint32 *)malloc((w + 2)* (h + 2) * sizeof(Uint32));
+ if(p == NULL) return 0;
+ memset(p, 0x00, (w + 2) * (h + 2) * sizeof(Uint32));
+ glGenTextures(1, &ttid);
+ glBindTexture(GL_TEXTURE_2D, ttid);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); // Limit mipmap level , reduce resources.
+ glTexImage2D(GL_TEXTURE_2D,
+ 0,
+ GL_RGBA8UI,
+ w, h + 2,
+ 0,
+ GL_RGBA_INTEGER,
+ GL_UNSIGNED_BYTE,
+ p);
+ glBindTexture(GL_TEXTURE_2D, 0);
+ free(p);
+ return ttid;
+}
+
+
+void Flip_AG_GL(void)
+{
+ if(!InitVideo) return;
+}
+
+void DiscardTextures(int n, GLuint *id)
+{
+ if(GLDrawArea == NULL) return;
+ if(agDriverOps == NULL) return;
+ glDeleteTextures(n, id);
+
+}
+
+void DiscardTexture(GLuint tid)
+{
+ DiscardTextures(1, &tid);
+}
+
+
+void InitContextCL(void)
+{
+ if(GLDrawArea == NULL) return; // Context not created yet.
+ if(bInitCL == TRUE) return; // CL already initialized.
+
+#ifdef _USE_OPENCL
+ bCLEnabled = FALSE;
+ bCLGLInterop = FALSE;
+ if(bUseOpenCL && (cldraw == NULL) &&
+ bGL_PIXEL_UNPACK_BUFFER_BINDING) {
+ cl_int r;
+ cldraw = new GLCLDraw;
+ if(cldraw != NULL) {
+ r = cldraw->InitContext(nCLPlatformNum, nCLDeviceNum, bCLInteropGL);
+ if(r == CL_SUCCESS){
+ r = cldraw->BuildFromSource(cl_render);
+ XM7_DebugLog(XM7_LOG_DEBUG, "CL: Build KERNEL: STS = %d", r);
+ if(r == CL_SUCCESS) {
+ r = cldraw->SetupBuffer(&uVramTextureID);
+ r |= cldraw->SetupTable();
+ if(r != CL_SUCCESS){
+ delete cldraw;
+ cldraw = NULL;
+ } else if(cldraw->GetGLEnabled() != 0) {
+ bCLGLInterop = TRUE;
+ bCLEnabled = TRUE;
+ } else {
+ /*
+ *
+ */
+ bCLGLInterop = FALSE;
+ bCLEnabled = TRUE;
+ }
+ } else {
+ delete cldraw;
+ cldraw = NULL;
+ }
+ } else {
+ delete cldraw;
+ cldraw = NULL;
+ }
+ }
+ }
+#else
+ bCLEnabled = FALSE;
+ bCLGLInterop = FALSE;
+#endif // _USE_OPENCL
+ bInitCL = TRUE;
+}
+
+
+static void InitGridVertexsSub(GLfloat *p, int h)
+{
+ int y;
+ int yp;
+ float yf;
+ yp = 0;
+ for(y = 0; y < (h + 1); y++) {
+ yf = -1.0f + (float) (y + 1) * 2.0f / (float)h;
+ p[yp + 0] = -1.0f;
+ p[yp + 1] = yf;
+ p[yp + 2] = 0.96f;
+ p[yp + 3] = -1.0f;
+ p[yp + 4] = yf;
+ p[yp + 5] = 0.96f;
+ yp += 6;
+ }
+ return;
+}
+
+
+void InitGridVertexs(void)
+{
+ InitGridVertexsSub(GridVertexs200l, 200);
+ InitGridVertexsSub(GridVertexs400l, 400);
+}
+
+
+void InitGL_AG2(int w, int h)
+{
+ Uint32 flags;
+ int bpp = 32;
+ int rgb_size[3];
+ char *ext;
+
+ if(InitVideo) return;
+ InitVideo = TRUE;
+
+ vram_pb = NULL;
+ vram_pg = NULL;
+ vram_pr = NULL;
+#ifdef _USE_OPENCL
+ cldraw = NULL;
+#endif
+ flags = SDL_OPENGL | SDL_RESIZABLE;
+ switch (bpp) {
+ case 8:
+ rgb_size[0] = 3;
+ rgb_size[1] = 3;
+ rgb_size[2] = 2;
+ break;
+ case 15:
+ case 16:
+ rgb_size[0] = 5;
+ rgb_size[1] = 5;
+ rgb_size[2] = 5;
+ break;
+ default:
+ rgb_size[0] = 8;
+ rgb_size[1] = 8;
+ rgb_size[2] = 8;
+ break;
+ }
+ /*
+ * GL 拡張の取得 20110907-
+ */
+ InitVramSemaphore();
+ uVramTextureID = 0;
+ uNullTextureID = 0;
+ pVram2 = NULL;
+#ifdef _USE_OPENCL
+ bInitCL = FALSE;
+ nCLGlobalWorkThreads = 10;
+ bCLSparse = FALSE; // TRUE=Multi threaded CL,FALSE = Single Thread.
+ nCLPlatformNum = 0;
+ nCLDeviceNum = 0;
+ bCLInteropGL = FALSE;
+ //bCLDirectMapping = FALSE;
+#endif
+ InitVirtualVram();
+ //if(AG_UsingSDL(NULL)) {
+ InitFBO(); // 拡張の有無を調べてからFBOを初期化する。
+ // FBOの有無を受けて、拡張の有無変数を変更する(念のために)
+ InitGLExtensionVars();
+ InitGridVertexs(); // Grid初期化
+ //}
+
+ fBrightR = 1.0; // 輝度の初期化
+ fBrightG = 1.0;
+ fBrightB = 1.0;
+
+ return;
+}
+
+
+extern "C" {
+// OpenGL状態変数
+BOOL bGL_ARB_IMAGING; // イメージ操作可能か?
+BOOL bGL_ARB_COPY_BUFFER; // バッファ内コピー(高速化!)サポート
+BOOL bGL_EXT_INDEX_TEXTURE; // パレットモードに係わる
+BOOL bGL_EXT_COPY_TEXTURE; // テクスチャ間のコピー
+BOOL bGL_SGI_COLOR_TABLE; // パレットモード(SGI拡張)
+BOOL bGL_SGIS_PIXEL_TEXTURE; // テクスチャアップデート用
+BOOL bGL_EXT_PACKED_PIXEL; // PackedPixelを使ってアップデートを高速化?
+BOOL bGL_EXT_VERTEX_ARRAY; // 頂点を配列化して描画を高速化
+BOOL bGL_EXT_PALETTED_TEXTURE; // パレットモード(更に別拡張)
+BOOL bGL_PIXEL_UNPACK_BUFFER_BINDING; // ピクセルバッファがあるか?
+
+
+// FBO API
+PFNGLVERTEXPOINTEREXTPROC glVertexPointerEXT;
+PFNGLDRAWARRAYSEXTPROC glDrawArraysEXT;
+PFNGLTEXCOORDPOINTEREXTPROC glTexCoordPointerEXT;
+//#ifndef _WINDOWS
+PFNGLBINDBUFFERPROC glBindBuffer;
+PFNGLBUFFERDATAPROC glBufferData;
+PFNGLGENBUFFERSPROC glGenBuffers;
+PFNGLDELETEBUFFERSPROC glDeleteBuffers;
+//#endif
+
+BOOL QueryGLExtensions(const char *str)
+{
+ char *ext;
+ char *p;
+ int i;
+ int j;
+ int k;
+ int l;
+ int ll;
+//#ifndef _WINDOWS
+
+ if(str == NULL) return FALSE;
+ ll = strlen(str);
+ if(ll <= 0) return FALSE;
+
+ ext =(char *)glGetString(GL_EXTENSIONS);
+ if(ext == NULL) return FALSE;
+ l = strlen(ext);
+ if(l <= 0) return FALSE;
+ p = ext;
+ for(i = 0; i < l ; ){
+ int j = strcspn(p, " ");
+ if((ll == j) && (strncmp(str, p, j) == 0)) {
+ return TRUE;
+ }
+ p += (j + 1);
+ i += (j + 1);
+ }
+//#endif
+ return FALSE;
+}
+
+void InitGLExtensionVars(void)
+{
+ bGL_ARB_IMAGING = QueryGLExtensions("GL_ARB_imaging");
+ bGL_ARB_COPY_BUFFER = QueryGLExtensions("GL_ARB_copy_buffer");
+ bGL_EXT_INDEX_TEXTURE = QueryGLExtensions("GL_EXT_index_texture");
+ bGL_EXT_COPY_TEXTURE = QueryGLExtensions("GL_EXT_copy_texture");
+ bGL_SGI_COLOR_TABLE = QueryGLExtensions("GL_SGI_color_table");
+ bGL_SGIS_PIXEL_TEXTURE = QueryGLExtensions("GL_SGIS_pixel_texture");
+ bGL_EXT_PACKED_PIXEL = QueryGLExtensions("GL_EXT_packed_pixel");
+ bGL_EXT_PALETTED_TEXTURE = QueryGLExtensions("GL_EXT_paletted_texture");
+ bGL_EXT_VERTEX_ARRAY = QueryGLExtensions("GL_EXT_vertex_array");
+// bGL_PIXEL_UNPACK_BUFFER_BINDING = QueryGLExtensions("GL_pixel_unpack_buffer_binding");
+ bGL_PIXEL_UNPACK_BUFFER_BINDING = TRUE;
+ bCLEnabled = FALSE;
+ bCLGLInterop = FALSE;
+}
+
+
+#ifdef _WINDOWS
+#include <windef.h>
+extern PROC WINAPI wglGetProcAddress(LPCSTR lpszProc);
+//#else
+//extern void *glXGetProcAddress(const GLubyte *);
+#endif
+
+void InitFBO(void)
+{
+//#ifndef _WINDOWS // glx is for X11.
+// Use SDL for wrapper. 20130128
+ if(AG_UsingSDL(NULL)) {
+ glVertexPointerEXT = (PFNGLVERTEXPOINTEREXTPROC)SDL_GL_GetProcAddress("glVertexPointerEXT");
+ if(glVertexPointerEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glDrawArraysEXT = (PFNGLDRAWARRAYSEXTPROC)SDL_GL_GetProcAddress("glDrawArraysEXT");
+ if(glDrawArraysEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glTexCoordPointerEXT = (PFNGLTEXCOORDPOINTEREXTPROC)SDL_GL_GetProcAddress("glTexCoordPointerEXT");
+ if(glTexCoordPointerEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glBindBuffer = (PFNGLBINDBUFFERPROC)SDL_GL_GetProcAddress("glBindBuffer");
+ if(glBindBuffer == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glBufferData = (PFNGLBUFFERDATAPROC)SDL_GL_GetProcAddress("glBufferData");
+ if(glBufferData == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glGenBuffers = (PFNGLGENBUFFERSPROC)SDL_GL_GetProcAddress("glGenBuffers");
+ if(glGenBuffers == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)SDL_GL_GetProcAddress("glDeleteBuffers");
+ if(glDeleteBuffers == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ } else { // glx, wgl
+#ifndef _WINDOWS
+ glVertexPointerEXT = (PFNGLVERTEXPOINTEREXTPROC)glXGetProcAddress((const GLubyte *)"glVertexPointerEXT");
+ if(glVertexPointerEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glDrawArraysEXT = (PFNGLDRAWARRAYSEXTPROC)glXGetProcAddress((const GLubyte *)"glDrawArraysEXT");
+ if(glDrawArraysEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glTexCoordPointerEXT = (PFNGLTEXCOORDPOINTEREXTPROC)glXGetProcAddress((const GLubyte *)"glTexCoordPointerEXT");
+ if(glTexCoordPointerEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glBindBuffer = (PFNGLBINDBUFFERPROC)glXGetProcAddress((const GLubyte *)"glBindBuffer");
+ if(glBindBuffer == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glBufferData = (PFNGLBUFFERDATAPROC)glXGetProcAddress((const GLubyte *)"glBufferData");
+ if(glBufferData == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glGenBuffers = (PFNGLGENBUFFERSPROC)glXGetProcAddress((const GLubyte *)"glGenBuffers");
+ if(glGenBuffers == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)glXGetProcAddress((const GLubyte *)"glDeleteBuffers");
+ if(glDeleteBuffers == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+#else
+ glVertexPointerEXT = (PFNGLVERTEXPOINTEREXTPROC)wglGetProcAddress("glVertexPointerEXT");
+ if(glVertexPointerEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glDrawArraysEXT = (PFNGLDRAWARRAYSEXTPROC)wglGetProcAddress("glDrawArraysEXT");
+ if(glDrawArraysEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glTexCoordPointerEXT = (PFNGLTEXCOORDPOINTEREXTPROC)wglGetProcAddress("glTexCoordPointerEXT");
+ if(glTexCoordPointerEXT == NULL) bGL_EXT_VERTEX_ARRAY = FALSE;
+ glBindBuffer = (PFNGLBINDBUFFERPROC)wglGetProcAddress("glBindBuffer");
+ if(glBindBuffer == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glBufferData = (PFNGLBUFFERDATAPROC)wglGetProcAddress("glBufferData");
+ if(glBufferData == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glGenBuffers = (PFNGLGENBUFFERSPROC)wglGetProcAddress("glGenBuffers");
+ if(glGenBuffers == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+ glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)wglGetProcAddress("glDeleteBuffers");
+ if(glDeleteBuffers == NULL) bGL_PIXEL_UNPACK_BUFFER_BINDING = FALSE;
+#endif // _WINDOWS
+ }
+
+}
+
+}
--- /dev/null
+#ifndef AGAR_GLUTIL_H_INCLUDED\r
+#define AGAR_GLUTIL_H_INCLUDED\r
+\r
+\r
+#include <agar/core.h>\r
+#include <agar/core/types.h>\r
+#include <agar/gui.h>\r
+#include <agar/gui/glview.h>\r
+\r
+#include <SDL/SDL.h>\r
+#ifdef _WINDOWS\r
+#include <GL/gl.h>\r
+#include <GL/glext.h>\r
+#include <SDL/SDL_opengl.h>\r
+#else\r
+#include <GL/glx.h>\r
+#include <GL/glxext.h>\r
+#include <SDL/SDL_opengl.h>\r
+#endif\r
+\r
+#include "api_draw.h"\r
+#include "agar_xm7.h"\r
+#include "agar_vramutil.h"\r
+#include "agar_draw.h"\r
+#include "agar_gldraw.h"\r
+\r
+#ifdef __cplusplus\r
+extern "C" {\r
+#endif\r
+extern AG_GLView *GLDrawArea;\r
+extern BOOL bGL_ARB_IMAGING; // イメージ操作可能か?\r
+extern BOOL bGL_ARB_COPY_BUFFER; // バッファ内コピー(高速化!)サポート\r
+extern BOOL bGL_EXT_INDEX_TEXTURE; // パレットモードに係わる\r
+extern BOOL bGL_EXT_COPY_TEXTURE; // テクスチャ間のコピー\r
+extern BOOL bGL_SGI_COLOR_TABLE; // パレットモード(SGI拡張)\r
+extern BOOL bGL_SGIS_PIXEL_TEXTURE; // テクスチャアップデート用\r
+extern BOOL bGL_EXT_PACKED_PIXEL; // PackedPixelを使ってアップデートを高速化?\r
+extern BOOL bGL_EXT_VERTEX_ARRAY; // 頂点を配列化して描画を高速化\r
+extern BOOL bGL_EXT_PALETTED_TEXTURE; // パレットモード(更に別拡張)\r
+extern BOOL bGL_PIXEL_UNPACK_BUFFER_BINDING; // Pixel buffer\r
+extern BOOL bCLEnabled;\r
+extern BOOL bCLGLInterop;\r
+\r
+// FBO API\r
+extern PFNGLVERTEXPOINTEREXTPROC glVertexPointerEXT;\r
+extern PFNGLDRAWARRAYSEXTPROC glDrawArraysEXT;\r
+extern PFNGLTEXCOORDPOINTEREXTPROC glTexCoordPointerEXT;\r
+#ifndef _WINDOWS\r
+extern PFNGLBINDBUFFERPROC glBindBuffer;\r
+extern PFNGLBUFFERDATAPROC glBufferData;\r
+extern PFNGLGENBUFFERSPROC glGenBuffers;\r
+extern PFNGLDELETEBUFFERSPROC glDeleteBuffers;\r
+#endif\r
+ \r
+extern void InitFBO(void);\r
+ \r
+extern void InitGLExtensionVars(void);\r
+extern BOOL QueryGLExtensions(const char *str);\r
+\r
+#ifdef _USE_OPENCL\r
+extern int nCLGlobalWorkThreads;\r
+extern BOOL bCLSparse; // TRUE=Multi threaded CL,FALSE = Single Thread.\r
+extern BOOL bInitCL;\r
+ //extern BOOL bCLDirectMapping;\r
+#endif // _USE_OPENCL\r
+#ifdef __cplusplus\r
+}\r
+#endif\r
+#ifdef __cplusplus\r
+extern GLuint CreateNullTexture(int w, int h);\r
+extern GLuint CreateNullTextureCL(int w, int h);\r
+\r
+extern void Flip_AG_GL(void);\r
+extern void DiscardTextures(int n, GLuint *id);\r
+extern void DiscardTexture(GLuint id);\r
+#endif\r
+\r
+#endif // AGAR_GLUTIL_H_INCLUDED\r
--- /dev/null
+/*
+ * Log functions
+ * (C) 2014-06-30 K.Ohta
+ * History:
+ * Dec 30, 2014 Move from XM7/SDL, this was Ohta's original code.
+ * Licence : GPLv2
+ */
+
+#include "agar_logger.h"
+
+static int syslog_flag = 0;
+static int log_cons = 0;
+static int log_onoff = 0;
+static int log_opened = FALSE;
+extern "C"
+{
+
+void AGAR_OpenLog(int syslog, int cons)
+ {
+ int flags = 0;
+
+ log_onoff = 1;
+ if(syslog != 0) {
+ syslog_flag = -1;
+#if defined(_SYS_SYSLOG_H) || defined(_SYSLOG_H)
+ if(cons != 0) {
+ flags = LOG_CONS;
+ }
+ openlog("XM7", flags | LOG_PID | LOG_NOWAIT, LOG_USER);
+#endif
+ } else {
+ syslog_flag = 0;
+ }
+ log_cons = cons;
+ log_opened = TRUE;
+ }
+
+
+void AGAR_DebugLog(int level, const char *fmt, ...)
+ {
+ va_list ap;
+ struct tm *timedat;
+ time_t nowtime;
+ char strbuf[4096];
+ char strbuf2[256];
+ char strbuf3[24];
+ struct timeval tv;
+ int level_flag = LOG_USER;
+
+ if(log_onoff == 0) return;
+
+ if(level == AGAR_LOG_DEBUG) {
+ level_flag |= LOG_DEBUG;
+ } else if(level == AGAR_LOG_INFO) {
+ level_flag |= LOG_INFO;
+ } else if(level == AGAR_LOG_WARN) {
+ level_flag |= LOG_WARNING;
+ } else {
+ level_flag |= LOG_DEBUG;
+ }
+
+
+ va_start(ap, fmt);
+ vsnprintf(strbuf, 4095, fmt, ap);
+ nowtime = time(NULL);
+ gettimeofday(&tv, NULL);
+ if(log_cons != 0) { // Print only
+ timedat = localtime(&nowtime);
+ strftime(strbuf2, 255, "XM7: %Y-%m-%d %H:%M:%S", timedat);
+ snprintf(strbuf3, 23, ".%06d", tv.tv_usec);
+ fprintf(stdout, "%s%s %s\n", strbuf2, strbuf3, strbuf);
+ }
+ if(syslog_flag != 0) { // SYSLOG
+ syslog(level_flag, "uS=%06d %s", tv.tv_usec, strbuf);
+ }
+ va_end(ap);
+ }
+
+void AGAR_SetLogStatus(int sw)
+ {
+ if(sw == 0) {
+ log_onoff = 0;
+ } else {
+ log_onoff = 1;
+ }
+ }
+
+void AGAR_SetLogStdOut(int sw)
+ {
+ if(sw == 0) {
+ log_cons = 0;
+ } else {
+ log_cons = 1;
+ }
+ }
+
+void AGAR_SetLogSysLog(int sw)
+ {
+ if(sw == 0) {
+ syslog_flag = 0;
+ } else {
+ syslog_flag = 1;
+ }
+ }
+
+BOOL AGAR_LogGetStatus(void)
+ {
+ return (BOOL) log_opened;
+ }
+
+
+void AGAR_CloseLog(void)
+ {
+ if(syslog_flag != 0) {
+ closelog();
+ }
+ syslog_flag = 0;
+ log_cons = 0;
+ log_onoff = 0;
+ log_opened = 0;
+ }
+}
+
--- /dev/null
+/*
+ * Log functions
+ * (C) 2014-06-30 K.Ohta
+ *
+ * History:
+ * Dec 30, 2014 Move from XM7/SDL, this was Ohta's original code.
+ * Licence : GPLv2
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <syslog.h>
+
+#include <time.h>
+#include <sys/time.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ extern void AGAR_OpenLog(int syslog, int cons);
+ extern void AGAR_DebugLog(int level, const char *fmt, ...);
+ extern void AGAR_CloseLog(void);
+ extern void AGAR_SetLogStatus(int sw);
+ extern void AGAR_SetLogSysLog(int sw);
+ extern void AGAR_SetLogStdOut(int sw);
+ extern BOOL AGAR_LogGetStatus(void);
+
+#define AGAR_LOG_ON 1
+#define AGAR_LOG_OFF 0
+
+#define AGAR_LOG_DEBUG 0
+#define AGAR_LOG_INFO 1
+#define AGAR_LOG_WARN 2
+
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+#ifndef TRUE
+#define TRUE (!FALSE)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
\ No newline at end of file
--- /dev/null
+/*
+* FM-7 Emulator "XM7" -> CommonSourceProjedct
+* Virtual Vram Display(Agar widget version)
+* (C) 2012 K.Ohta <whatisthis.sowhat@gmail.com>
+* History:
+* Jan 18,2012 From demos/customwidget/mywidget.[c|h]
+* Jan 20,2012 Separete subroutines.
+* Dec 30,2014 Move from XM7/SDL, 100% my original file.
+*/
+
+#include "agar_sdlview.h"
+#include "agar_cfg.h"
+#include "api_vram.h"
+#include "api_draw.h"
+//#include "api_scaler.h"
+#include "api_kbd.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern "C" {
+extern struct AGAR_CPUID *pCpuID;
+extern BOOL bUseSIMD;
+}
+
+extern "C" { // Define Headers
+ // scaler/generic
+ extern void pVram2RGB_x05_Line(Uint32 *src, Uint8 *dst, int x, int xend, int y, int yrep); // scaler_x05.c , raster render
+ extern void pVram2RGB_x1_Line(Uint32 *src, Uint8 *dst, int x, int xend, int y, int yrep); // scaler_x1.c , raster render
+ extern void pVram2RGB_x125_Line(Uint32 *src, Uint8 *dst, int x, int xend, int y, int yrep); // scaler_x125.c , raster render
+ extern void pVram2RGB_x15_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x15.c , raster render.
+ extern void pVram2RGB_x2_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x2.c , raster render.
+ extern void pVram2RGB_x225_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x225.c , raster render.
+ extern void pVram2RGB_x25_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x25.c , raster render.
+ extern void pVram2RGB_x3_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x3.c , raster render.
+ extern void pVram2RGB_x4_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x4.c , raster render.
+ extern void pVram2RGB_x45_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x45.c , raster render.
+ extern void pVram2RGB_x5_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x5.c , raster render.
+ extern void pVram2RGB_x6_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x6.c , raster render.
+#if defined(USE_SSE2) // scaler/sse2/
+ extern void pVram2RGB_x1_Line_SSE2(Uint32 *src, Uint8 *dst, int x, int xend, int y, int yrep); // scaler_x1_sse2.c , raster render
+ extern void pVram2RGB_x125_Line_SSE2(Uint32 *src, Uint8 *dst, int x, int xend, int y, int yrep); // scaler_x125_sse2.c , raster render
+ extern void pVram2RGB_x15_Line_SSE2(Uint32 *src, Uint8 *dst, int x, int xend, int y, int yrep); // scaler_x15_sse2.c , raster render
+ extern void pVram2RGB_x2_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x2_sse2.c , raster render.
+ extern void pVram2RGB_x225_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x225_sse2.c , raster render.
+ extern void pVram2RGB_x25_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x25_sse2.c , raster render.
+ extern void pVram2RGB_x3_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x3_sse2.c , raster render.
+ extern void pVram2RGB_x4_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x4_sse2.c , raster render.
+ extern void pVram2RGB_x45_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x45_sse2.c , raster render.
+ extern void pVram2RGB_x5_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x5_sse2.c , raster render.
+ extern void pVram2RGB_x6_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep); // scaler_x6_sse2.c , raster render.
+#endif
+}
+
+static int iScaleFactor = 1;
+static void *pDrawFn = NULL;
+static void *pDrawFn2 = NULL;
+static int iOldW = 0;
+static int iOldH = 0;
+
+
+static inline Uint32 pVram_XtoHalf(Uint32 d1, Uint32 d2)
+{
+ Uint32 d0;
+ Uint16 r,g,b,a;
+#if AG_BIG_ENDIAN
+ r = (d1 & 0x000000ff) + (d2 & 0x000000ff);
+ g = ((d1 & 0x0000ff00) >> 8) + ((d2 & 0x0000ff00) >> 8);
+ b = ((d1 & 0x00ff0000) >> 16) + ((d2 & 0x00ff0000) >> 16);
+ d0 = 0xff000000 | (r >> 1) | ((b << 15) & 0x00ff0000) | ((g << 7) & 0x0000ff00);
+#else
+ r = ((d1 & 0xff000000) >> 24) + ((d2 & 0xff000000) >> 24);
+ g = ((d1 & 0x00ff0000) >> 16) + ((d2 & 0x00ff0000) >> 16);
+ b = ((d1 & 0x0000ff00) >> 8) + ((d2 & 0x0000ff00) >> 8);
+ d0 = 0x000000ff | ((r << 23) & 0xff000000) | ((g << 15) & 0x00ff0000) | ((b << 7) & 0x0000ff00);
+#endif
+ return d0;
+}
+
+
+#if defined(USE_SSE2)
+// w0, h0 = Console
+// w1, h1 = DrawMode
+static void *AGAR_SDLViewSelectScaler_Line_SSE2(int w0 ,int h0, int w1, int h1)
+{
+ int wx0 = w0 >> 1; // w1/4
+ int hy0 = h0 >> 1;
+ int xfactor;
+ int yfactor;
+ int xth;
+ void (*DrawFn)(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep);
+ DrawFn = NULL;
+
+ xfactor = w1 % wx0;
+ yfactor = h1 % hy0;
+ xth = wx0 >> 1;
+ if(__builtin_expect((iScaleFactor == (w1 / w0) && (pDrawFn2 != NULL)
+ && (w1 == iOldW) && (h1 == iOldH)), 1)) return (void *)pDrawFn2;
+ iScaleFactor = w1 / w0;
+ iOldW = w1;
+ iOldH = h1;
+ switch(iScaleFactor){
+ case 0:
+ if(w0 > 480){
+ if((w1 < 480) || (h1 < 150)){
+ DrawFn = pVram2RGB_x05_Line;
+ } else {
+ DrawFn = pVram2RGB_x1_Line_SSE2;
+ }
+ } else {
+ DrawFn = pVram2RGB_x1_Line_SSE2;
+ }
+ break;
+
+ case 1:
+ if(w1 > 900) {
+ DrawFn = pVram2RGB_x15_Line_SSE2; // 1.5?
+ } else if(w1 > 720) {
+ DrawFn = pVram2RGB_x125_Line_SSE2; // 1.25
+ } else {
+ DrawFn = pVram2RGB_x1_Line_SSE2; // 1.0
+ }
+ break;
+ case 2:
+// if(xfactor < xth){
+ if((w1 > 720) && (w0 <= 480)) {
+ DrawFn = pVram2RGB_x25_Line_SSE2; // x2.5
+ } else if((w1 > 1360) && (w1 <= 1520)){
+ DrawFn = pVram2RGB_x225_Line_SSE2; // x2.25
+ } else if(w1 > 1700){
+ DrawFn = pVram2RGB_x3_Line_SSE2; // x3
+ } else if(w1 > 1520){
+ DrawFn = pVram2RGB_x25_Line_SSE2; // x2.5@1600
+ } else {
+ DrawFn = pVram2RGB_x2_Line_SSE2; // x2
+ }
+ break;
+ case 3:
+ DrawFn = pVram2RGB_x3_Line_SSE2; // x3
+ break;
+ case 4:
+ if((w1 > 1360) && (w1 < 1760) && (w0 <= 480)) { // 4.5
+ DrawFn = pVram2RGB_x45_Line_SSE2; // 4.5
+ } else {
+ DrawFn = pVram2RGB_x4_Line_SSE2; // 4.0
+ }
+ break;
+ case 5:
+ DrawFn = pVram2RGB_x5_Line_SSE2;
+ break;
+ case 6:
+ case 7:
+ case 8:
+ DrawFn = pVram2RGB_x6_Line_SSE2;
+ break;
+ default:
+ DrawFn = pVram2RGB_x1_Line_SSE2;
+ break;
+ }
+ pDrawFn2 = (void *)DrawFn;
+ return (void *)DrawFn;
+}
+#endif // USE_SSE2
+
+
+// w0, h0 = Console
+// w1, h1 = DrawMode
+static void *AGAR_SDLViewSelectScaler_Line(int w0 ,int h0, int w1, int h1)
+{
+ int wx0 = w0 >> 1; // w1/4
+ int hy0 = h0 >> 1;
+ int xfactor;
+ int yfactor;
+ int xth;
+ void (*DrawFn)(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep);
+ DrawFn = NULL;
+
+#if defined(USE_SSE2)
+ if(pCpuID != NULL){
+ if(pCpuID->use_sse2) {
+ return AGAR_SDLViewSelectScaler_Line_SSE2(w0, h0, w1, h1);
+ }
+ }
+#endif
+
+ xfactor = w1 % wx0;
+ yfactor = h1 % hy0;
+ xth = wx0 >> 1;
+ if(__builtin_expect((iScaleFactor == (w1 / w0) && (pDrawFn2 != NULL)
+ && (w1 == iOldW) && (h1 == iOldH)), 1)) return (void *)pDrawFn2;
+ iScaleFactor = w1 / w0;
+ iOldW = w1;
+ iOldH = h1;
+ switch(iScaleFactor){
+ case 0:
+ if(w0 > 480){
+ if((w1 < 480) || (h1 < 150)){
+ DrawFn = pVram2RGB_x05_Line;
+ } else {
+ DrawFn = pVram2RGB_x1_Line;
+ }
+ } else {
+ DrawFn = pVram2RGB_x1_Line;
+ }
+ break;
+
+ case 1:
+ if(w1 > 900) {
+ DrawFn = pVram2RGB_x15_Line; // 1.5?
+ } else if(w1 > 720) {
+ DrawFn = pVram2RGB_x125_Line; // 1.25
+ } else {
+ DrawFn = pVram2RGB_x1_Line; // 1.0
+ }
+ break;
+ case 2:
+// if(xfactor < xth){
+ if((w1 > 720) && (w0 <= 480)) {
+ DrawFn = pVram2RGB_x25_Line; // x2.5
+ } else if((w1 > 1360) && (w1 <= 1520)){
+ DrawFn = pVram2RGB_x225_Line; // x2.25
+ }else if(w1 > 1700){
+ DrawFn = pVram2RGB_x3_Line; // x3
+ }else if(w1 > 1520){
+ DrawFn = pVram2RGB_x25_Line; // x2.5
+ } else {
+ DrawFn = pVram2RGB_x2_Line;
+ }
+ break;
+ case 3:
+ DrawFn = pVram2RGB_x3_Line; // x3
+ break;
+ case 4:
+ if((w1 > 1360) && (w1 < 1760) && (w0 <= 480)) { // 4.5
+ DrawFn = pVram2RGB_x45_Line; // 4.5
+ } else {
+ DrawFn = pVram2RGB_x4_Line; // 4.0
+ }
+ break;
+ case 5:
+ DrawFn = pVram2RGB_x5_Line;
+ break;
+ case 6:
+ case 7:
+ case 8:
+ DrawFn = pVram2RGB_x6_Line;
+ break;
+ default:
+ DrawFn = pVram2RGB_x1_Line;
+ break;
+ }
+ pDrawFn2 = (void *)DrawFn;
+ return (void *)DrawFn;
+}
+
+
+
+void AGAR_SDLViewUpdateSrc(AG_Event *event)
+{
+ AGAR_SDLView *my = (AGAR_SDLView *)AG_SELF();
+ void *Fn = NULL;
+ void (*DrawFn2)(Uint32 *, Uint8 *, int , int , int, int);
+ AG_Surface *Surface;
+
+ Uint8 *pb;
+ Uint32 *disp;
+ Uint32 *src;
+ Uint8 *dst;
+ int yrep2;
+ int y2, y3;
+ int w;
+ int h;
+ int ww;
+ int hh;
+ int xx;
+ int yy;
+ int pitch;
+ int bpp;
+ int of;
+ int yrep;
+ int ymod;
+ int yfact;
+ int lcount;
+ int xcache;
+ BOOL flag = FALSE;
+
+ Fn = AG_PTR(1);
+ if(my == NULL) return;
+ Surface = AGAR_SDLViewGetSrcSurface(my);
+
+ if(Surface == NULL) return;
+ DrawSurface = Surface;
+ w = Surface->w;
+ h = Surface->h;
+ pb = (Uint8 *)(Surface->pixels);
+ pitch = Surface->pitch;
+ bpp = Surface->format->BytesPerPixel;
+
+
+ if(pVram2 == NULL) return;
+ if(__builtin_expect((crt_flag == FALSE), 0)) {
+ AG_Rect rr;
+ AG_Color cc;
+
+ cc.r = 0x00;
+ cc.g = 0x00;
+ cc.b = 0x00;
+ cc.a = 0xff;
+
+ LockVram();
+ //AG_ObjectLock(AGOBJECT(my));
+ AG_SurfaceLock(Surface);
+ AG_FillRect(Surface, NULL, cc);
+ //AG_ObjectUnlock(AGOBJECT(my));
+ AGAR_SDLViewSetDirty(my);
+ UnlockVram();
+ return;
+ }
+
+ switch(bMode){
+ case SCR_200LINE:
+ ww = 640;
+ hh = 200;
+ break;
+ case SCR_400LINE:
+ ww = 640;
+ hh = 400;
+ break;
+ default:
+ ww = 320;
+ hh = 200;
+ break;
+ }
+ Fn = XM7_SDLViewSelectScaler_Line(ww , hh, w, h);
+ if(__builtin_expect((Fn != NULL), 1)) {
+ DrawFn2 = (void (*)(Uint32 *, Uint8 *, int , int , int, int))Fn;
+ } else {
+ return;
+ }
+
+
+
+ if(h > hh) {
+ ymod = h % hh;
+ yrep = h / hh;
+ } else {
+ ymod = h % hh;
+ yrep = 1;
+ }
+
+ if(Fn == NULL) return;
+ src = pVram2;
+ LockVram();
+ AG_ObjectLock(AGOBJECT(my));
+
+ if(nRenderMethod == RENDERING_RASTER) {
+ if(my->forceredraw != 0){
+ for(yy = 0; yy < hh; yy++) {
+ bDrawLine[yy] = TRUE;
+ }
+ my->forceredraw = 0;
+ }
+ Surface = GetDrawSurface();
+ if(Surface == NULL) goto _end1;
+ AG_SurfaceLock(Surface);
+ dst = (Uint8 *)(Surface->pixels);
+#ifdef _OPENMP
+#pragma omp parallel for shared(hh, bDrawLine, yrep, ww, src, Surface, flag) private(dst, y2, y3)
+#endif
+ for(yy = 0 ; yy < hh; yy++) {
+/*
+* Virtual VRAM -> Real Surface:
+*/
+ if(__builtin_expect((bDrawLine[yy] == TRUE), 0)) {
+// _prefetch_data_read_l2(&src[yy * 80], ww * sizeof(Uint32));
+ y2 = (h * yy ) / hh;
+ y3 = (h * (yy + 1)) / hh;
+ dst = (Uint8 *)(Surface->pixels + Surface->pitch * y2);
+ yrep2 = y3 - y2;
+ if(__builtin_expect((yrep2 < 1), 0)) yrep2 = 1;
+ DrawFn2(src, dst, 0, ww, yy, yrep2);
+ bDrawLine[yy] = FALSE;
+ flag = TRUE;
+ }
+ dst = dst + (yrep2 * Surface->pitch);
+ }
+ AG_SurfaceUnlock(Surface);
+ // BREAK.
+ goto _end1;
+ } else { // Block
+ if(my->forceredraw != 0){
+ for(yy = 0; yy < (hh >> 3); yy++) {
+ for(xx = 0; xx < (ww >> 3); xx++ ){
+ SDLDrawFlag.write[xx][yy] = TRUE;
+ }
+ }
+ }
+ }
+
+/*
+ * Below is BLOCK or FULL.
+ * Not use from line-rendering.
+ */
+
+ Surface = GetDrawSurface();
+ if(Surface == NULL) goto _end1;
+ AG_SurfaceLock(Surface);
+
+#ifdef _OPENMP
+# pragma omp parallel for shared(pb, SDLDrawFlag, ww, hh, src, flag) private(disp, of, xx, lcount, xcache, y2, y3, dst)
+#endif
+ for(yy = 0 ; yy < hh; yy += 8) {
+ lcount = 0;
+ xcache = 0;
+// dst = (Uint8 *)(Surface->pixels + Surface->pitch * y2);
+ for(xx = 0; xx < ww; xx += 8) {
+/*
+* Virtual VRAM -> Real Surface:
+* disp = (Uint32 *)(pb + xx * bpp + yy * pitch);
+* of = (xx % 8) + (xx / 8) * (8 * 8)
+* + (yy % 8) * 8 + (yy / 8) * 640 * 8;
+* *disp = src[of];
+** // xx,yy = 1scale(not 8)
+*/
+// if(xx >= w) continue;
+ if(__builtin_expect((SDLDrawFlag.write[xx >> 3][yy >> 3] != FALSE), 1)) {
+ lcount += 8;
+ SDLDrawFlag.write[xx >> 3][yy >> 3] = FALSE;
+ } else {
+ if(__builtin_expect((lcount != 0), 1)) {
+ int yy2;
+ // disp = (Uint32 *)pb;
+ // of = (xx *8) + yy * ww;
+ // DrawFn(&src[of], disp, xx, yy, yrep);
+ for(yy2 = 0; yy2 < 8; yy2++) {
+ y2 = (h * (yy + yy2)) / hh;
+ y3 = (h * (yy + yy2 + 1)) / hh;
+ dst = (Uint8 *)(Surface->pixels + Surface->pitch * y2);
+ yrep2 = y3 - y2;
+ if(__builtin_expect((yrep2 < 1), 0)) yrep2 = 1;
+ DrawFn2(src, dst, xcache, xcache + lcount, yy + yy2 , yrep2);
+ flag = TRUE;
+ }
+ }
+
+ xcache = xx + 8;
+ lcount = 0;
+ }
+ }
+
+
+ if(__builtin_expect((lcount != 0), 1)) {
+ int yy2;
+ // disp = (Uint32 *)pb;
+ // of = (xx *8) + yy * ww;
+ // DrawFn(&src[of], disp, xx, yy, yrep);
+ for(yy2 = 0; yy2 < 8; yy2++) {
+ y2 = (h * (yy + yy2)) / hh;
+ y3 = (h * (yy + yy2 + 1)) / hh;
+ dst = (Uint8 *)(Surface->pixels + Surface->pitch * y2);
+ yrep2 = y3 - y2;
+ if(__builtin_expect((yrep2 < 1), 0)) yrep2 = 1;
+ DrawFn2(src, dst, xcache, xcache + lcount, yy + yy2 , yrep2);
+ flag = TRUE;
+ }
+ }
+// if(yy >= h) continue;
+ }
+ AG_SurfaceUnlock(Surface);
+
+_end1:
+ AG_ObjectUnlock(AGOBJECT(my));
+ if(flag != FALSE) XM7_SDLViewSetDirty(my);
+ UnlockVram();
+ return;
+}
--- /dev/null
+/*
+* FM-7 Emulator "XM7"
+* Virtual Vram Display(Agar widget version)
+* (C) 2012 K.Ohta <whatisthis.sowhat@gmail.com>
+* License: GPL2
+* History:
+* Jan 18,2012 From demos/customwidget/mywidget.[c|h]
+* Dec 30,2014 Move from XM7/SDL, these codes are complete? original.
+* Createed from libAgar's templete.
+*
+*/
+/*
+ * Implementation of a typical Agar widget which uses surface mappings to
+ * efficiently draw surfaces, regardless of the underlying graphics system.
+ *
+ * If you are not familiar with the way the Agar object system handles
+ * inheritance, see demos/objsystem.
+ */
+
+#ifdef _WINDOWS
+//#define _OFF_T_
+#endif
+
+#include "agar_sdlview.h"
+#include "cache_wrapper.h"
+#include <SDL/SDL.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif // _OPENMP
+
+/*
+ * This is a generic constructor function. It is completely optional, but
+ * customary of FooNew() functions to allocate, initialize and attach an
+ * instance of the class.
+ */
+static void ForceRedrawFn(AG_Event *event)
+{
+ AGAR_SDLView *my = (AGAR_SDLView *)AG_SELF();
+ AG_WidgetMapSurfaceNODUP(my, AGWIDGET_SURFACE(my, my->mySurface));
+
+ AGAR_SDLViewSetDirty(my);
+}
+
+
+AGAR_SDLView *AGAR_SDLViewNew(void *parent, AG_Surface *src, const char *param)
+{
+ AGAR_SDLView *my;
+
+ /* Create a new instance of the MyWidget class */
+ my = malloc(sizeof(AGAR_SDLView));
+ AG_ObjectInit(my, &AGAR_SDLViewClass);
+
+ /* Set some constructor arguments */
+ my->param = param;
+ my->draw_ev = NULL;
+
+ my->forceredraw = 1;
+ /* Attach the object to the parent (no-op if parent is NULL) */
+ AG_ObjectAttach(parent, my);
+ AG_ObjectLock(my);
+ if(__builtin_expect((src != NULL), 1)) {
+ my->mySurface = AGAR_SDLViewLinkSurface(my, src);
+ } else {
+ my->mySurface = -1;
+ }
+ AG_ObjectUnlock(my);
+ return (my);
+}
+
+
+
+int AGAR_SDLViewLinkSurface(void *p, AG_Surface *src)
+{
+ AGAR_SDLView *my = p;
+ my->mySurface = AG_WidgetMapSurfaceNODUP(my, src);
+// my->mySurface = AG_WidgetMapSurface(my, src);
+ return my->mySurface;
+}
+
+int AGAR_SDLViewSurfaceNew(void *p, int w, int h)
+{
+ AGAR_SDLView *my = p;
+ AG_Surface *src;
+ AG_PixelFormat fmt;
+
+ fmt.BitsPerPixel = 32;
+ fmt.BytesPerPixel = 4;
+#ifdef AG_BIG_ENDIAN
+ fmt.Rmask = 0x000000ff; // R
+ fmt.Gmask = 0x0000ff00; // G
+ fmt.Bmask = 0x00ff0000; // B
+ fmt.Amask = 0xff000000; // A
+#else
+ fmt.Rmask = 0x00ff0000; // R
+ fmt.Gmask = 0x0000ff00; // G
+ fmt.Bmask = 0xff000000; // B
+ fmt.Amask = 0x000000ff; // A
+#endif
+ fmt.Rshift = 0;
+ fmt.Gshift = 8;
+ fmt.Bshift = 16;
+ fmt.Ashift = 24;
+ fmt.Rloss = 0;
+ fmt.Gloss = 0;
+ fmt.Bloss = 0;
+ fmt.Aloss = 0;
+ fmt.palette = NULL;
+ fmt.alpha = 255;
+
+ src = AG_SurfaceNew(AG_SURFACE_PACKED , w, h, &fmt, 0);
+ AG_ObjectLock(my);
+ //my->mySurface = AG_WidgetMapSurfaceNODUP(my, src);
+ AGAR_SDLViewLinkSurface(my, src);
+ my->forceredraw = 1;
+ AG_ObjectUnlock(my);
+ return my->mySurface;
+}
+
+
+void AGAR_SDLViewSurfaceDetach(void *p)
+{
+ AGAR_SDLView *my = p;
+ int i;
+
+ AG_ObjectLock(my);
+ if(my->mySurface >= 0) {
+ AG_WidgetUnmapSurface(my, my->mySurface);
+ }
+ my->mySurface = -1;
+ AG_ObjectUnlock(my);
+ printf("AGAR_SDLViewSurfaceDetach()\n");
+}
+
+AG_Surface *AGAR_SDLViewGetSurface(void *p, int num)
+{
+ AGAR_SDLView *my = p;
+ if(my != NULL) {
+ if(num >= 0) {
+ if(num <= my->_inherit.nsurfaces) return my->_inherit.surfaces[num];
+ }
+ }
+ return NULL;
+}
+
+AG_Surface *AGAR_SDLViewGetSrcSurface(void *p)
+{
+ AGAR_SDLView *my = p;
+ if(my == NULL) return NULL;
+ return AGAR_SDLViewGetSurface(my, my->mySurface);
+}
+
+void AGAR_SDLViewSetSurfaceNum(void *p, int num)
+{
+ AGAR_SDLView *my = p;
+ if(num < -1) return;
+
+ AG_ObjectLock(AGOBJECT(my));
+ if(num < my->_inherit.nsurfaces) my->mySurface = num;
+ AG_ObjectUnlock(AGOBJECT(my));
+}
+
+
+
+
+// Resist Draw Function
+void AGAR_SDLViewDrawFn(void *p, AG_EventFn fn, const char *fmt, ...)
+{
+ /*
+ * Function must be void foo(AG_Event *) .
+ */
+ AGAR_SDLView *my = p;
+
+ AG_ObjectLock(my);
+ my->draw_ev = AG_SetEvent(my, NULL, fn , NULL);
+ AG_EVENT_GET_ARGS(my->draw_ev, fmt);
+ AG_ObjectUnlock(my);
+
+}
+
+void AGAR_SDLViewSetDirty(void *p)
+{
+ AGAR_SDLView *my = p;
+ AG_ObjectLock(my);
+ my->dirty = 1;
+ AG_ObjectUnlock(my);
+}
+
+/*
+ * This function requests a minimal geometry for displaying the widget.
+ * It is expected to return the width and height in pixels into r.
+ *
+ * Note: Some widgets will provide FooSizeHint() functions to allow the
+ * programmer to request an initial size in pixels or some other metric
+ * FooSizeHint() typically sets some structure variable, which are then
+ * used here.
+ */
+static void SizeRequest(void *p, AG_SizeReq *r)
+{
+ AGAR_SDLView *my = p;
+
+ AG_ObjectLock(my);
+ if (my->mySurface == -1) {
+ /*
+ * We can use AG_TextSize() to return the dimensions of rendered
+ * text, without rendering it.
+ */
+ r->w = my->_inherit.w;
+ r->h = my->_inherit.w;
+ AGAR_SDLViewSurfaceNew(my, r->w, r->h);
+ } else {
+ /*
+ * We can use the geometry of the rendered surface. The
+ * AGWIDGET_SURFACE() macro returns the AG_Surface given a
+ * Widget surface handle.
+ */
+ r->w = (AGWIDGET_SURFACE(my,my->mySurface)->w / 8) * 8; // Set boundary as 32(bytes) = 8(dwords) : 256bit.
+ r->h = AGWIDGET_SURFACE(my,my->mySurface)->h;
+ if(AGWIDGET_SURFACE(my,my->mySurface) != NULL) AG_SurfaceResize(AGWIDGET_SURFACE(my,my->mySurface), r->w, r->h);
+ }
+ AG_ObjectUnlock(my);
+}
+
+/*
+ * This function is called by the parent widget after it decided how much
+ * space to allocate to this widget. It is mostly useful to container
+ * widgets, but other widgets generally use it to check if the allocated
+ * geometry can be handled by Draw().
+ */
+static void Draw(void *p);
+
+static int SizeAllocate(void *p, const AG_SizeAlloc *a)
+{
+ AGAR_SDLView *my = p;
+ AG_Surface *su;
+ AG_Rect r;
+ AG_Color c;
+
+ if(my == NULL) return -1;
+ /* If we return -1, Draw() will not be called. */
+ if (a->w < 5 || a->h < 5)
+ return (-1);
+
+ su = AGWIDGET_SURFACE(my, my->mySurface);
+ AG_ObjectLock(my);
+ if(su == NULL) {
+ my->mySurface = AGAR_SDLViewSurfaceNew(my, a->w, a->h);
+ su = AGWIDGET_SURFACE(my, my->mySurface);
+ }
+
+ if((su->w != a->w) || (su->h != a->h)) {
+ if(AG_SurfaceResize(su, a->w, a->h) < 0) {
+ AG_ObjectUnlock(my);
+ return (-1);
+ }
+ }
+
+ my->forceredraw = 1;
+ my->dirty = 1;
+ // Clear
+ r.x = 0;
+ r.y = 0;
+ r.w = a->w;
+ r.h = a->h;
+ c.a = 255;
+ c.r = 0;
+ c.g = 0;
+ c.b = 0;
+ AG_FillRect(su, &r, c);
+// AGAR_SDLViewLinkSurface(AGWIDGET(my), su);
+ AG_WidgetSetPosition(AGWIDGET(my), a->x, a->y);
+ AG_ObjectUnlock(my);
+ return (0);
+}
+
+/*
+ * Draw function. Invoked from GUI rendering context to draw the widget
+ * at its current location. All primitive and surface operations operate
+ * on widget coordinates.
+ */
+static void Draw(void *p)
+{
+ AGAR_SDLView *my = p;
+ /*
+ * Draw a box spanning the widget area. In order to allow themeing,
+ * you would generally use a STYLE() call here instead, see AG_Style(3)
+ * for more information on styles.
+ */
+ AG_ObjectLock(my);
+
+ if(my->draw_ev != NULL){
+ my->draw_ev->handler(my->draw_ev);
+ }
+
+ /*
+ * Render some text into a new surface. In OpenGL mode, the
+ * AG_WidgetMapSurface() call involves a texture upload.
+ */
+
+ /* Blit the mapped surface at [0,0]. */
+ // _prefetch_data_read_l2(my->Surface->pixels, sizeof(my->Surface->pixels));
+ if((my->dirty != 0) || (my->forceredraw != 0)){
+ if(my->mySurface >= 0) {
+ if(AG_UsingGL(NULL) != 0) {
+ AG_WidgetMapSurfaceNODUP(my, AGWIDGET_SURFACE(my, my->mySurface));
+ //AG_WidgetUpdateSurface(my, my->mySurface);
+ AG_WidgetBlitSurface(my, my->mySurface, 0, 0);
+ } else {
+ //AG_WidgetMapSurface(my, AGWIDGET_SURFACE(my, my->mySurface));
+ AG_WidgetBlitSurface(my, my->mySurface, 0, 0);
+ }
+ }
+ my->dirty = 0;
+ }
+ AG_ObjectUnlock(my);
+}
+
+
+
+/* Mouse motion event handler */
+static void MouseMotion(AG_Event *event)
+{
+ AGAR_SDLView *my = AG_SELF();
+ int x = AG_INT(1);
+ int y = AG_INT(2);
+ /* ... */
+}
+
+/* Mouse click event handler */
+static void MouseButtonDown(AG_Event *event)
+{
+ AGAR_SDLView *my = AG_SELF();
+ int button = AG_INT(1);
+ int x = AG_INT(2);
+ int y = AG_INT(3);
+
+ if (button != AG_MOUSE_LEFT) {
+ return;
+ }
+ printf("Click at %d,%d\n", x, y);
+ AG_WidgetFocus(my);
+}
+
+/* Mouse click event handler */
+static void MouseButtonUp(AG_Event *event)
+{
+ AGAR_SDLView *my = AG_SELF();
+ int button = AG_INT(1);
+ int x = AG_INT(2);
+ int y = AG_INT(3);
+
+ /* ... */
+}
+
+/* Keystroke event handler */
+static void KeyDown(AG_Event *event)
+{
+ AGAR_SDLView *my = AG_SELF();
+ int keysym = AG_INT(1);
+
+// printf("Keystroke: 0x%x\n", keysym);
+}
+
+/* Keystroke event handler */
+static void KeyUp(AG_Event *event)
+{
+ AGAR_SDLView *my = AG_SELF();
+ int keysym = AG_INT(1);
+
+ /* ... */
+}
+
+/*
+ * Initialization routine. Note that the object system will automatically
+ * invoke the initialization routines of the parent classes first.
+ */
+static void Init(void *obj)
+{
+ AGAR_SDLView *my = obj;
+
+ /* Allow this widget to grab focus. */
+ AGWIDGET(my)->flags |= AG_WIDGET_FOCUSABLE;
+
+ /* Initialize instance variables. */
+ my->param = "";
+
+ /*
+ * We'll eventually need to create and map a surface, but we cannot
+ * do this from Init(), because it involves texture operations in
+ * GL mode which are thread-unsafe. We wait until Draw() to do that.
+ */
+ my->mySurface = -1;
+
+ /*
+ * Map our event handlers. For a list of all meaningful events
+ * we can handle, see AG_Object(3), AG_Widget(3) and AG_Window(3).
+ *
+ * Here we register handlers for the common AG_Window(3) events.
+ */
+
+ AG_SetEvent(my, "mouse-button-up", MouseButtonUp, NULL);
+ AG_SetEvent(my, "mouse-button-down", MouseButtonDown, NULL);
+ AG_SetEvent(my, "mouse-motion", MouseMotion, NULL);
+ AG_SetEvent(my, "key-up", KeyUp, NULL);
+ AG_SetEvent(my, "key-down", KeyDown, NULL);
+}
+
+static void Detach(void *obj)
+{
+ AGAR_SDLView *my = obj;
+ int i;
+
+ if(my == NULL) return;
+ AG_ObjectLock(my);
+ for(i = (my->_inherit.nsurfaces - 1); i >= 0; i--) {
+ AG_WidgetUnmapSurface(my, i);
+ }
+
+ my->mySurface = -1;
+ AG_ObjectUnlock(my);
+}
+/*
+ * This structure describes our widget class. It inherits from AG_ObjectClass.
+ * Any of the function members may be NULL. See AG_Widget(3) for details.
+ */
+AG_WidgetClass AGAR_SDLViewClass = {
+ {
+ "AG_Widget:AGAR_SDLView", /* Name of class */
+ sizeof(AGAR_SDLView), /* Size of structure */
+ { 0,0 }, /* Version for load/save */
+ Init, /* Initialize dataset */
+ Detach, /* Free dataset */
+ NULL, /* Destroy widget */
+ NULL, /* Load widget (for GUI builder) */
+ NULL, /* Save widget (for GUI builder) */
+ NULL /* Edit (for GUI builder) */
+ },
+ Draw, /* Render widget */
+ SizeRequest, /* Default size requisition */
+ SizeAllocate /* Size allocation callback */
+};
--- /dev/null
+/*
+* FM-7 Emulator "XM7"
+* Virtual Vram Display(Agar widget version)
+* (C) 2012 K.Ohta <whatisthis.sowhat@gmail.com>
+* License: CC-BY-SA
+* History:
+* Jan 18,2012 From demos/customwidget/mywidget.[c|h]
+*
+*/
+#ifndef __AGAR_SDL_VIEW
+#define __AGAR_SDL_VIEW
+
+# ifdef __cplusplus
+extern "C" {
+#endif
+
+//#include <sys/types.h>
+//#include <agar/core/string_compat.h>
+#include <agar/core.h>
+//#include <agar/core/types.h>
+#include <agar/gui.h>
+
+/*
+* Do compatibility for widget
+*/
+#define Strlcat AG_Strlcat
+#define Strlcpy AG_Strlcpy
+#define Strsep AG_Strsep
+#define Strdup AG_Strdup
+#define TryStrdup AG_TryStrdup
+#define Strcasecmp AG_Strcasecmp
+#define Strncasecmp AG_Strncasecmp
+#define Strcasestr AG_Strcasestr
+#define StrlcatUCS4 AG_StrlcatUCS4
+#define StrlcpyUCS4 AG_StrlcpyUCS4
+#define StrsepUCS4 AG_StrsepUCS4
+#define StrdupUCS4 AG_StrdupUCS4
+#define TryStrdupUCS4 AG_TryStrdupUCS4
+#define StrReverse AG_StrReverse
+#define StrlcpyInt AG_StrlcpyInt
+#define StrlcatInt AG_StrlcatInt
+#define StrlcpyUint AG_StrlcpyUint
+#define StrlcatUint AG_StrlcatUint
+
+
+/* Structure describing an instance of the AGAR_SDLView. */
+typedef struct AGAR_SDLView {
+ struct ag_widget _inherit; /* Inherit from AG_Widget */
+ int mySurface; /* Surface handle : CURRENT */
+ AG_Event *draw_ev; // draw handler event
+ int forceredraw;
+ int dirty;
+ const char *param; /* Some parameter */
+} AGAR_SDLView;
+
+extern AG_WidgetClass AGAR_SDLViewClass;
+extern AGAR_SDLView *AGAR_SDLViewNew(void *, AG_Surface *, const char *);
+
+extern int AGAR_SDLViewLinkSurface(void *p, AG_Surface *src);
+extern int AGAR_SDLViewSurfaceNew(void *p, int w, int h);
+extern void AGAR_SDLViewSurfaceDetach(void *p);
+extern AG_Surface *AGAR_SDLViewGetSurface(void *p, int index);
+extern AG_Surface *AGAR_SDLViewGetSrcSurface(void *p);
+extern void AGAR_SDLViewSetSurfaceNum(void *p, int num);
+
+extern void AGAR_SDLViewDrawFn(void *p, AG_EventFn fn, const char *fmt, ...);
+extern void AGAR_SDLViewSetDirty(void *p);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __AGAR_SDL_VIEW */
--- /dev/null
+message("* ui-agar/scaler/generic")
+
+#set(CMAKE_BUILD_SETTING_C_FLAGS "${CMAKE_C_FLAGS} -msse2 -msse -mmmx")
+add_library(xm7_scaler-generic
+ scaler_x05.c
+ scaler_x1.c
+ scaler_x125.c
+ scaler_x15.c
+ scaler_x2.c
+ scaler_x225.c
+ scaler_x25.c
+ scaler_x3.c
+ scaler_x4.c
+ scaler_x45.c
+ scaler_x5.c
+ scaler_x6.c
+)
--- /dev/null
+/*
+ * Zoom x0.5
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-01-26 Move from agar_sdlscaler.cpp
+ */
+
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+
+void pVram2RGB_x05_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ v8hi_t *b;
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int yy2;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int pitch;
+ int yrep2 = yrep;
+ v8hi_t rmask1, gmask1, bmask1, amask1;
+ v4hi rmask2, gmask2, bmask2, amask2;
+ Uint32 black;
+ AG_Surface *Surface = GetDrawSurface();
+
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+ pitch = Surface->pitch / sizeof(Uint32);
+ if(yrep2 <= 0) yrep2 = 1; // Okay?
+
+#if AG_BIG_ENDIAN != 1
+ rmask1.i[0] = rmask1.i[1] = rmask1.i[2] = rmask1.i[3] =
+ rmask1.i[4] = rmask1.i[5] = rmask1.i[6] = rmask1.i[7] = 0x000000ff;
+
+ gmask1.i[0] = gmask1.i[1] = gmask1.i[2] = gmask1.i[3] =
+ gmask1.i[4] = gmask1.i[5] = gmask1.i[6] = gmask1.i[7] = 0x0000ff00;
+
+ bmask1.i[0] = bmask1.i[1] = bmask1.i[2] = bmask1.i[3] =
+ bmask1.i[4] = bmask1.i[5] = bmask1.i[6] = bmask1.i[7] = 0x00ff0000;
+
+ amask1.i[0] = amask1.i[1] = amask1.i[2] = amask1.i[3] =
+ amask1.i[4] = amask1.i[5] = amask1.i[6] = amask1.i[7] = 0xff000000;
+
+ amask2.i[0] = amask2.i[1] = amask2.i[2] = amask2.i[3] = 0xff000000;
+ bmask2.i[0] = bmask2.i[1] = bmask2.i[2] = bmask2.i[3] = 0x00ff0000;
+ gmask2.i[0] = gmask2.i[1] = gmask2.i[2] = gmask2.i[3] = 0x0000ff00;
+ rmask2.i[0] = rmask2.i[1] = rmask2.i[2] = rmask2.i[3] = 0x000000ff;
+
+#else
+ rmask1.i[0] = rmask1.i[1] = rmask1.i[2] = rmask1.i[3] =
+ rmask1.i[4] = rmask1.i[5] = rmask1.i[6] = rmask1.i[7] = 0xff000000;
+
+ gmask1.i[0] = gmask1.i[1] = gmask1.i[2] = gmask1.i[3] =
+ gmask1.i[4] = gmask1.i[5] = gmask1.i[6] = gmask1.i[7] = 0x00ff0000;
+
+ bmask1.i[0] = bmask1.i[1] = bmask1.i[2] = bmask1.i[3] =
+ bmask1.i[4] = bmask1.i[5] = bmask1.i[6] = bmask1.i[7] = 0x0000ff00;
+
+ amask1.i[0] = amask1.i[1] = amask1.i[2] = amask1.i[3] =
+ amask1.i[4] = amask1.i[5] = amask1.i[6] = amask1.i[7] = 0x000000ff;
+
+ rmask2.i[0] = rmask2.i[1] = rmask2.i[2] = rmask2.i[3] = 0xff000000;
+ gmask2.i[0] = gmask2.i[1] = gmask2.i[2] = gmask2.i[3] = 0x00ff0000;
+ bmask2.i[0] = bmask2.i[1] = bmask2.i[2] = bmask2.i[3] = 0x0000ff00;
+ amask2.i[0] = amask2.i[1] = amask2.i[2] = amask2.i[3] = 0x000000ff;
+#endif
+ d1 = (Uint32 *)(dst + (xbegin >> 1) * Surface->format->BytesPerPixel);
+ p = &src[xbegin + y * 640];
+ if(((xbegin >>1) + 4) >= w) {
+ Uint32 amask, rmask, gmask, bmask;
+ Uint32 bd1, bd2;
+ Uint32 r, g, b, a;
+ int j;
+
+#if AG_BIG_ENDIAN != 1
+ amask = 0xff000000;
+ bmask = 0x00ff0000;
+ gmask = 0x0000ff00;
+ rmask = 0x000000ff;
+#else
+ rmask = 0xff000000;
+ gmask = 0x00ff0000;
+ bmask = 0x0000ff00;
+ amask = 0x000000ff;
+#endif
+ ww = (xend - xbegin) / 2;
+ if(ww > w) ww = w;
+
+ for(xx = 0; xx < ww; xx++) {
+ bd1 = p[0];
+ bd2 = p[1];
+ r = (((bd1 & rmask) >> 1) + ((bd2 & rmask) >> 1)) & rmask;
+ g = (((bd1 & gmask) >> 1) + ((bd2 & gmask) >> 1)) & gmask;
+ b = (((bd1 & bmask) >> 1) + ((bd2 & bmask) >> 1)) & bmask;
+ d2 = &d1[xx];
+ for(j = 0; j < yrep2; j++) {
+ *d2 = r | g | b | amask;
+ d2 += pitch;
+ }
+ p += 2;
+ }
+ return;
+ }
+
+
+ {
+ v4hi *pd;
+ v4hi cr, cg, cb, cd;
+ v8hi_t *b;
+ v8hi_t br,bg, bb;
+ Uint32 *d0;
+
+ ww = (xend - xbegin) / 2;
+ if(ww > w) ww = w;
+ d0 = d1;
+ for(xx = 0; xx < ww; xx++) {
+ d1 = d0;
+ b = (v8hi_t *)p;
+ br.v = b->v & rmask1.v;
+ bg.v = b->v & gmask1.v;
+ bb.v = b->v & bmask1.v;
+ cr.i[0] = (br.i[0] >> 1) + (br.i[1] >> 1);
+ cr.i[1] = (br.i[2] >> 1) + (br.i[3] >> 1);
+ cr.i[2] = (br.i[4] >> 1) + (br.i[5] >> 1);
+ cr.i[3] = (br.i[6] >> 1) + (br.i[7] >> 1);
+
+ cb.i[0] = (bb.i[0] + bb.i[1]) >> 1;
+ cb.i[1] = (bb.i[2] + bb.i[3]) >> 1;
+ cb.i[2] = (bb.i[4] + bb.i[5]) >> 1;
+ cb.i[3] = (bb.i[6] + bb.i[7]) >> 1;
+
+ cg.i[0] = (bg.i[0] + bg.i[1]) >> 1;
+ cg.i[1] = (bg.i[2] + bg.i[3]) >> 1;
+ cg.i[2] = (bg.i[4] + bg.i[5]) >> 1;
+ cg.i[3] = (bg.i[6] + bg.i[7]) >> 1;
+ cr.v = cr.v & rmask2.v;
+ cg.v = cg.v & gmask2.v;
+ cb.v = cb.v & bmask2.v;
+ cd.v = cr.v | cg.v | cb.v | amask2.v;
+ for(i = 0; i < yrep2; i++) {
+ pd = (v4hi *)d1;
+ *pd = cd;
+ d1 += pitch;
+ }
+ d0 += 4;
+ p += 8;
+ }
+ }
+}
+
+
+
+
+
+
--- /dev/null
+/*
+ * Zoom x1x1
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-01-26 Move from agar_sdlscaler.cpp
+ */
+
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+void pVram2RGB_x1_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2 = yrep;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+ ww = xend - xbegin;
+ if(ww <= 0) return;
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+ d1 = (Uint32 *)(dst + xbegin * Surface->format->BytesPerPixel);
+ d2 = &src[xbegin + y * 640];
+
+ pitch = Surface->pitch / sizeof(Uint32);
+ { // Not thinking align ;-(
+
+ int j;
+ v4hi b2;
+ v4hi b3;
+ register v4hi bb;
+ v4hi *b2p;
+ Uint32 *d0;
+
+ b = (v4hi *)d2;
+ bb.i[0] = bb.i[1] = bb.i[2] = bb.i[3] = black;
+ if(yrep2 <= 0) yrep2 = 1;
+ switch(yrep2) {
+ case 1:
+// case 2:
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (v4hi *)d1;
+ b2p[0] = b[0];
+ b2p[1] = b[1];
+ d1 += 8;
+ b += 2;
+ }
+ break;
+ default:
+ d0 = d1;
+ for(xx = 0; xx < ww; xx += 8){
+ d1 = d0;
+ b2 = b[0];
+ b3 = b[1];
+
+ for(j = 0; j < yrep2; j++) {
+ b2p = (v4hi *)d1;
+ if(!bFullScan && (j >= (yrep2 >> 1))) {
+ b2p[0] =
+ b2p[1] = bb;
+ } else {
+ b2p[0] = b2;
+ b2p[1] = b3;
+ }
+ d1 += pitch;
+ }
+ d0 += 8;
+ b += 2;
+ }
+
+ break;
+ }
+
+ }
+}
+
--- /dev/null
+/*
+ * Zoom x1.25x2 i.e. 800x480.
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+
+static void Scaler_DrawLine(Uint32 *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ Uint32 *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const Uint32 bb = 0xff000000;
+#else
+ const Uint32 bb = 0x000000ff;
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = (Uint32 *)dst;
+ pitch2 = pitch / sizeof(Uint32);
+ if((bFullScan) || (repeat < 2)) {
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (Uint32 *)dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 7654432100
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = b2p[1] = r1.i[0];
+ b2p[2] = r1.i[1];
+ b2p[3] = r1.i[2];
+ b2p[4] = r1.i[3];
+ b2p[5] = b2p[6] = r2.i[0];
+ b2p[7] = r2.i[1];
+ b2p[8] = r2.i[2];
+ b2p[9] = r2.i[3];
+ b2p = b2p + pitch2;
+ }
+ dst = dst + 10;
+// b += 2;
+ }
+ } else {
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (Uint32 *)dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 776655444332211000
+ // 76543210 -> 7654432100
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = b2p[1] = r1.i[0];
+ b2p[2] = r1.i[1];
+ b2p[3] = r1.i[2];
+ b2p[4] = r1.i[3];
+ b2p[5] = b2p[6] = r2.i[0];
+ b2p[7] = r2.i[1];
+ b2p[8] = r2.i[2];
+ b2p[9] = r2.i[3];
+ b2p = b2p + pitch2;
+ }
+ b2p[0] = b2p[1] = b2p[2] = b2p[3] =
+ b2p[4] = b2p[5] = b2p[6] = b2p[7] =
+ b2p[8] = b2p[9] =
+ bb;
+ dst = dst + 10;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x125_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 10) / 8) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine(d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
--- /dev/null
+/*
+ * Zoom x1.5
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ v4hi r3, r4, r5;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 766544322100
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[2]};
+ r4.uv = (v4ui){r1.i[2], r1.i[3], r2.i[0], r2.i[0]};
+ r5.uv = (v4ui){r2.i[1], r2.i[2], r2.i[2], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p = b2p + pitch2;
+ }
+ dst += 3;
+// b += 2;
+ }
+ } else {
+ v4hi r3, r4, r5;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 766544322100
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[2]};
+ r4.uv = (v4ui){r1.i[2], r1.i[3], r2.i[0], r2.i[0]};
+ r5.uv = (v4ui){r2.i[1], r2.i[2], r2.i[2], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p = b2p + pitch2;
+ }
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv = bb;
+ dst += 3;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x15_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 6) / 4) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
--- /dev/null
+/*
+ * Zoom x2x2
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-01-26 Move from agar_sdlscaler.cpp
+ */
+
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+void pVram2RGB_x2_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int wodd;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+ ww = xend - xbegin;
+ if((ww * 2) > w) ww = w / 2;
+ if(ww <= 0) return;
+ wodd = ww % 8;
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+ yrep2 = yrep;
+ d1 = (Uint32 *)(dst + xbegin * 2 * Surface->format->BytesPerPixel);
+ d2 = &src[xbegin + y * 640];
+
+ pitch = Surface->pitch / sizeof(Uint32);
+ { // Not thinking align ;-(
+
+ int j;
+ v4hi b2;
+ v4hi b3;
+ v4hi b4;
+ v4hi b5;
+ register v4hi bb;
+ v4hi *b2p;
+ Uint32 *d0;
+
+ b = (v4hi *)d2;
+ bb.i[0] = bb.i[1] = bb.i[2] = bb.i[3] = black;
+ switch(yrep2) {
+ case 0:
+ case 1:
+// case 2:
+ d0 = d1;
+ for(xx = 0; xx < (ww - 1); xx += 8) {
+ d1 = d0;
+ b2p = (v4hi *)d1;
+ b2.i[0] = b2.i[1] = b[0].i[0];
+ b2.i[2] = b2.i[3] = b[0].i[1];
+ b3.i[0] = b3.i[1] = b[0].i[2];
+ b3.i[2] = b3.i[3] = b[0].i[3];
+
+ b4.i[0] = b4.i[1] = b[1].i[0];
+ b4.i[2] = b4.i[3] = b[1].i[1];
+ b5.i[0] = b5.i[1] = b[1].i[2];
+ b5.i[2] = b5.i[3] = b[1].i[3];
+ b2p[0] = b2;
+ b2p[1] = b3;
+ b2p[2] = b4;
+ b2p[3] = b5;
+ d0 += 16;
+ b += 2;
+ }
+ if(wodd != 0) {
+ Uint32 *bp = (Uint32 *)b;
+ for(i = 0; i < wodd; i++) {
+ *d0 = *bp;
+ d0[1] = *bp;
+ d0++;
+ bp++;
+ }
+ }
+ break;
+ default:
+ d0 = d1;
+ for(xx = 0; xx < (ww - 1); xx += 8){
+ d1 = d0;
+ b2.i[0] = b2.i[1] = b[0].i[0];
+ b2.i[2] = b2.i[3] = b[0].i[1];
+ b3.i[0] = b3.i[1] = b[0].i[2];
+ b3.i[2] = b3.i[3] = b[0].i[3];
+
+ b4.i[0] = b4.i[1] = b[1].i[0];
+ b4.i[2] = b4.i[3] = b[1].i[1];
+ b5.i[0] = b5.i[1] = b[1].i[2];
+ b5.i[2] = b5.i[3] = b[1].i[3];
+
+ for(j = 0; j < yrep2; j++) {
+ b2p = (v4hi *)d1;
+ if(!bFullScan && (j >= (yrep2 >> 1))) {
+ b2p[0] =
+ b2p[1] =
+ b2p[2] =
+ b2p[3] = bb;
+ } else {
+ b2p[0] = b2;
+ b2p[1] = b3;
+ b2p[2] = b4;
+ b2p[3] = b5;
+ }
+ d1 += pitch;
+ }
+ d0 += 16;
+ b += 2;
+ }
+ if(wodd != 0) {
+ Uint32 *bp = (Uint32 *)b;
+ for(i = 0; i < wodd; i++) {
+ *d0 = *bp;
+ d0[1] = *bp;
+ d0[pitch] = *bp;
+ d0[pitch + 1] = *bp;
+ d0++;
+ bp++;
+ }
+ }
+ break;
+ }
+
+ }
+}
+
--- /dev/null
+/*
+ * Zoom x2.25x2 i.e. 1440x900.
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+static void Scaler_DrawLine(Uint32 *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ Uint32 *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const Uint32 bb = 0xff000000;
+#else
+ const Uint32 bb = 0x000000ff;
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = (Uint32 *)dst;
+ pitch2 = pitch / sizeof(Uint32);
+ if((bFullScan) || (repeat < 2)) {
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (Uint32 *)dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 776655444332211000
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = b2p[1] = b2p[2] = r1.i[0];
+ b2p[3] = b2p[4] = r1.i[1];
+ b2p[5] = b2p[6] = r1.i[2];
+ b2p[7] = b2p[8] = r1.i[3];
+ b2p[9] = b2p[10] = b2p[11] = r2.i[0];
+ b2p[12] = b2p[13] = r2.i[1];
+ b2p[14] = b2p[15] = r2.i[2];
+ b2p[16] = b2p[17] = r2.i[3];
+ b2p = b2p + pitch2;
+ }
+ dst = dst + 18;
+// b += 2;
+ }
+ } else {
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (Uint32 *)dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 776655444332211000
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = b2p[1] = b2p[2] = r1.i[0];
+ b2p[3] = b2p[4] = r1.i[1];
+ b2p[5] = b2p[6] = r1.i[2];
+ b2p[7] = b2p[8] = r1.i[3];
+ b2p[9] = b2p[10] = b2p[11] = r2.i[0];
+ b2p[12] = b2p[13] = r2.i[1];
+ b2p[14] = b2p[15] = r2.i[2];
+ b2p[16] = b2p[17] = r2.i[3];
+ b2p = b2p + pitch2;
+ }
+ b2p[0] = b2p[1] = b2p[2] = b2p[3] =
+ b2p[4] = b2p[5] = b2p[6] = b2p[7] =
+ b2p[8] = b2p[9] =
+ b2p[10] = b2p[11] = b2p[12] = b2p[13] =
+ b2p[14] = b2p[15] = b2p[16] = b2p[17] =
+ bb;
+ dst = dst + 18;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x225_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+ if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 18) / 8) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine(d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
--- /dev/null
+/*
+ * Zoom x2x2
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x25(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ v4hi bb2;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ bb2.uv = bb;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ volatile v4hi r3, r4, r5, r6, r7;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 77666554443322211000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[1]};
+ r4.uv = (v4ui){r1.i[1], r1.i[2], r1.i[2], r1.i[2]};
+ r5.uv = (v4ui){r1.i[3], r1.i[3], r2.i[0], r2.i[0]};
+ r6.uv = (v4ui){r2.i[0], r2.i[1], r2.i[1], r2.i[2]};
+ r7.uv = (v4ui){r2.i[2], r2.i[2], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ *b2p++ = r3;
+ *b2p++ = r4;
+ *b2p++ = r5;
+ *b2p++ = r6;
+ *b2p++ = r7;
+ b2p = b2p + (pitch2 - 5);
+ }
+ dst += 5;
+// b += 2;
+ }
+ } else {
+ volatile v4hi r3, r4, r5, r6, r7;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 77666554443322211000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[1]};
+ r4.uv = (v4ui){r1.i[1], r1.i[2], r1.i[2], r1.i[2]};
+ r5.uv = (v4ui){r1.i[3], r1.i[3], r2.i[0], r2.i[0]};
+ r6.uv = (v4ui){r2.i[0], r2.i[1], r2.i[1], r2.i[2]};
+ r7.uv = (v4ui){r2.i[2], r2.i[2], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ *b2p++ = r3;
+ *b2p++ = r4;
+ *b2p++ = r5;
+ *b2p++ = r6;
+ *b2p++ = r7;
+ b2p = b2p + (pitch2 - 5);
+ }
+ b2p[0].uv = bb;
+ b2p[1].uv = bb;
+ b2p[2].uv = bb;
+ b2p[3].uv = bb;
+ b2p[4].uv = bb;
+ dst += 5;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x25_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+ if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 20) / 16) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
--- /dev/null
+/*
+ * Zoom x3
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ v4hi r3, r4, r5, r6, r7, r8;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 7776666555444333222111000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[1]};
+ r4.uv = (v4ui){r1.i[1], r1.i[1], r1.i[2], r1.i[2]};
+ r5.uv = (v4ui){r1.i[2], r1.i[3], r1.i[3], r1.i[3]};
+
+ r6.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[1]};
+ r7.uv = (v4ui){r2.i[1], r2.i[1], r2.i[2], r2.i[2]};
+ r8.uv = (v4ui){r2.i[2], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p = b2p + pitch2;
+ }
+ dst += 6;
+// b += 2;
+ }
+ } else {
+ v4hi r3, r4, r5, r6, r7, r8;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 777766666555544444333322222111100000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[1]};
+ r4.uv = (v4ui){r1.i[1], r1.i[1], r1.i[2], r1.i[2]};
+ r5.uv = (v4ui){r1.i[2], r1.i[3], r1.i[3], r1.i[3]};
+
+ r6.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[1]};
+ r7.uv = (v4ui){r2.i[1], r2.i[1], r2.i[2], r2.i[2]};
+ r8.uv = (v4ui){r2.i[2], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p = b2p + pitch2;
+ }
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv =
+ b2p[3].uv =
+ b2p[4].uv =
+ b2p[5].uv = bb;
+ dst += 6;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x3_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + x * 3 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
--- /dev/null
+/*
+ * Zoom x4x4
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-01-26 Move from agar_sdlscaler.cpp
+ * 2013-09-17 Move from ui-agar/
+ */
+
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+
+
+void pVram2RGB_x4_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ unsigned pitch;
+ int yrep2;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+ ww = xend - xbegin;
+ if((ww * 4) >= w) ww = w / 4;
+ ww = ww - 7;
+ if(ww <= 0) return;
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+ yrep2 = yrep;
+ d1 = (Uint32 *)(dst + x * 4 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+
+
+ pitch = Surface->pitch / sizeof(Uint32);
+ { // Not thinking align ;-(
+
+ int j;
+ v4hi b2;
+ v4hi b3;
+ v4hi b4;
+ v4hi b5;
+ v4hi b6;
+ v4hi b7;
+ v4hi b8;
+ v4hi b9;
+ register v4hi bb;
+ v4hi *b2p;
+ Uint32 *d0;
+ Uint32 dd;
+
+ b = (v4hi *)d2;
+ bb.i[0] = bb.i[1] = bb.i[2] = bb.i[3] = black;
+ if((((y * yrep2) % 16) == 0) && ((yrep2 % 16) != 0)) yrep2 += 16;
+ yrep2 >>= 4;
+ switch(yrep2) {
+ case 0:
+ case 1:
+// case 2:
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (v4hi *)d1;
+ b2.i[0] = b2.i[1] = b2.i[2] = b2.i[3] = b[0].i[0];
+ b3.i[0] = b3.i[1] = b3.i[2] = b3.i[3] = b[0].i[1];
+ b4.i[0] = b4.i[1] = b4.i[2] = b4.i[3] = b[0].i[2];
+ b5.i[0] = b5.i[1] = b5.i[2] = b5.i[3] = b[0].i[3];
+
+ b6.i[0] = b6.i[1] = b6.i[2] = b6.i[3] = b[1].i[0];
+ b7.i[0] = b7.i[1] = b7.i[2] = b7.i[3] = b[1].i[1];
+ b8.i[0] = b8.i[1] = b8.i[2] = b8.i[3] = b[1].i[2];
+ b9.i[0] = b9.i[1] = b9.i[2] = b9.i[3] = b[1].i[3];
+
+ b2p[0] = b2;
+ b2p[1] = b3;
+ b2p[2] = b4;
+ b2p[3] = b5;
+ b2p[4] = b6;
+ b2p[5] = b7;
+ b2p[6] = b8;
+ b2p[7] = b9;
+ d1 += 32;
+ b += 2;
+ }
+ if((ww % 8) != 0){
+ j = 0;
+ d0 = (Uint32 *)b;
+ b2p = (v4hi *)d1;
+ for(j = 0;j < (ww % 8); j++) {
+ b2.i[0] = b2.i[1] = b2.i[3] = b2.i[4] = *d0;
+ *b2p = b2;
+ d0++;
+ b2p++;
+ }
+ }
+ break;
+ default:
+ d0 = d1;
+ for(xx = 0; xx < ww; xx += 8){
+ d1 = d0;
+ b2.i[0] = b2.i[1] = b2.i[2] = b2.i[3] = b[0].i[0];
+ b3.i[0] = b3.i[1] = b3.i[2] = b3.i[3] = b[0].i[1];
+ b4.i[0] = b4.i[1] = b4.i[2] = b4.i[3] = b[0].i[2];
+ b5.i[0] = b5.i[1] = b5.i[2] = b5.i[3] = b[0].i[3];
+
+ b6.i[0] = b6.i[1] = b6.i[2] = b6.i[3] = b[1].i[0];
+ b7.i[0] = b7.i[1] = b7.i[2] = b7.i[3] = b[1].i[1];
+ b8.i[0] = b8.i[1] = b8.i[2] = b8.i[3] = b[1].i[2];
+ b9.i[0] = b9.i[1] = b9.i[2] = b9.i[3] = b[1].i[3];
+
+
+ for(j = 0; j < yrep2; j++) {
+ b2p = (v4hi *)d1;
+ if(!bFullScan && (j > (yrep2 >> 1))) {
+ b2p[0] =
+ b2p[1] =
+ b2p[2] =
+ b2p[3] =
+ b2p[4] =
+ b2p[5] =
+ b2p[6] =
+ b2p[7] = bb;
+ } else {
+ b2p[0] = b2;
+ b2p[1] = b3;
+ b2p[2] = b4;
+ b2p[3] = b5;
+ b2p[4] = b6;
+ b2p[5] = b7;
+ b2p[6] = b8;
+ b2p[7] = b9;
+ }
+ d1 += pitch;
+ }
+ d0 += 32;
+ b += 2;
+ }
+ if((ww % 8) != 0){
+ d2 = (Uint32 *)b;
+ d0 = d1;
+ for(j = 0;j < (ww % 8); j++) {
+ d1 = d0;
+ b2.i[0] = b2.i[1] = b2.i[3] = b2.i[4] = *d2;
+ for(i = 0; i < (yrep2 >> 1); i++) {
+ b2p = (v4hi *)d1;
+ if(!bFullScan && (j > (yrep2 >> 2))) {
+ *b2p = bb;
+ } else {
+ *b2p = b2;
+ }
+ d1 += pitch;
+ }
+ d0 += 4;
+ d2++;
+ }
+ }
+ break;
+ }
+
+ }
+}
+
--- /dev/null
+/*
+ * Zoom x4.5
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 777766666555544444333322222111100000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[1], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[2], r1.i[2], r1.i[2]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};
+ r7.uv = (v4ui){r1.i[3], r1.i[3], r2.i[0], r2.i[0]};
+
+ r8.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[1]};
+ r9.uv = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[2]};
+ r10.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};
+ r11.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p = b2p + pitch2;
+ }
+ dst += 9;
+// b += 2;
+ }
+ } else {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 777766666555544444333322222111100000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[1], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[2], r1.i[2], r1.i[2]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};
+ r7.uv = (v4ui){r1.i[3], r1.i[3], r2.i[0], r2.i[0]};
+
+ r8.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[1]};
+ r9.uv = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[2]};
+ r10.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};
+ r11.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p = b2p + pitch2;
+ }
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv =
+ b2p[3].uv =
+ b2p[4].uv =
+ b2p[5].uv =
+ b2p[6].uv =
+ b2p[7].uv =
+ b2p[8].uv = bb;
+ dst += 9;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x45_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 18) / 4) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
--- /dev/null
+/*
+ * Zoom x2x2
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11, r12;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[1], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[1], r1.i[2], r1.i[2]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[2], r1.i[3]};
+ r7.uv = (v4ui){r1.i[3], r1.i[3], r1.i[3], r1.i[3]};
+
+ r8.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[0]};
+ r9.uv = (v4ui){r2.i[0], r2.i[1], r2.i[1], r2.i[1]};
+ r10.uv = (v4ui){r2.i[1], r2.i[1], r2.i[2], r2.i[2]};
+ r11.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[3]};
+ r12.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p[9] = r12;
+ b2p = b2p + pitch2;
+ }
+ dst += 10;
+// b += 2;
+ }
+ } else {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11, r12;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[1], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[1], r1.i[2], r1.i[2]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[2], r1.i[3]};
+ r7.uv = (v4ui){r1.i[3], r1.i[3], r1.i[3], r1.i[3]};
+
+ r8.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[0]};
+ r9.uv = (v4ui){r2.i[0], r2.i[1], r2.i[1], r2.i[1]};
+ r10.uv = (v4ui){r2.i[1], r2.i[1], r2.i[2], r2.i[2]};
+ r11.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[3]};
+ r12.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p[9] = r12;
+ b2p = b2p + pitch2;
+ }
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv =
+ b2p[3].uv =
+ b2p[4].uv =
+ b2p[5].uv =
+ b2p[6].uv =
+ b2p[7].uv =
+ b2p[8].uv =
+ b2p[9].uv = bb;
+ dst += 10;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x5_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + x * 5 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
--- /dev/null
+/*
+ * Zoom x2x2
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11, r12;
+ v4hi r13, r14;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[1], r1.i[1], r1.i[1]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[2], r1.i[2]};
+ r7.uv = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};
+ r8.uv = (v4ui){r1.i[3], r1.i[3], r1.i[3], r1.i[3]};
+
+ r9.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[0]};
+ r10.uv = (v4ui){r2.i[0], r2.i[0], r2.i[1], r2.i[1]};
+ r11.uv = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[1]};
+ r12.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};
+ r13.uv = (v4ui){r2.i[2], r2.i[2], r2.i[3], r2.i[3]};
+ r14.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p[9] = r12;
+ b2p[10] = r13;
+ b2p[11] = r14;
+ b2p = b2p + pitch2;
+ }
+ dst += 10;
+// b += 2;
+ }
+ } else {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11, r12;
+ v4hi r13, r14;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[1], r1.i[1], r1.i[1]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[2], r1.i[2]};
+ r7.uv = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};
+ r8.uv = (v4ui){r1.i[3], r1.i[3], r1.i[3], r1.i[3]};
+
+ r9.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[0]};
+ r10.uv = (v4ui){r2.i[0], r2.i[0], r2.i[1], r2.i[1]};
+ r11.uv = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[1]};
+ r12.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};
+ r13.uv = (v4ui){r2.i[2], r2.i[2], r2.i[3], r2.i[3]};
+ r14.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p[9] = r12;
+ b2p[10] = r13;
+ b2p[11] = r14;
+ b2p = b2p + pitch2;
+ }
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv =
+ b2p[3].uv =
+ b2p[4].uv =
+ b2p[5].uv =
+ b2p[6].uv =
+ b2p[7].uv =
+ b2p[8].uv =
+ b2p[9].uv =
+ b2p[10].uv =
+ b2p[11].uv = bb;
+ dst += 12;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x6_Line(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + x * 6 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
--- /dev/null
+message("* ui-agar/scaler/sse2")
+
+#set(CMAKE_BUILD_SETTING_C_FLAGS "${CMAKE_C_FLAGS} -msse2 -msse -mmmx")
+add_compile_options(-msse2 -msse -mmmx)
+add_library(xm7_scaler-sse2
+ scaler_x1_sse2.c
+ scaler_x125_sse2.c
+ scaler_x15_sse2.c
+ scaler_x2_sse2.c
+ scaler_x225_sse2.c
+ scaler_x25_sse2.c
+ scaler_x3_sse2.c
+ scaler_x4_sse2.c
+ scaler_x45_sse2.c
+ scaler_x5_sse2.c
+ scaler_x6_sse2.c
+)
--- /dev/null
+/*
+ * Zoom x1.25x2 i.e. 800x480.
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x125(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+
+static void Scaler_DrawLine(Uint32 *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ Uint32 *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const Uint32 bb = 0xff000000;
+#else
+ const Uint32 bb = 0x000000ff;
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = (Uint32 *)dst;
+ pitch2 = pitch / sizeof(Uint32);
+ if((bFullScan) || (repeat < 2)) {
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (Uint32 *)dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 7654432100
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = b2p[1] = r1.i[0];
+ b2p[2] = r1.i[1];
+ b2p[3] = r1.i[2];
+ b2p[4] = r1.i[3];
+ b2p[5] = b2p[6] = r2.i[0];
+ b2p[7] = r2.i[1];
+ b2p[8] = r2.i[2];
+ b2p[9] = r2.i[3];
+ b2p = b2p + pitch2;
+ }
+ dst = dst + 10;
+// b += 2;
+ }
+ } else {
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (Uint32 *)dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 776655444332211000
+ // 76543210 -> 7654432100
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = b2p[1] = r1.i[0];
+ b2p[2] = r1.i[1];
+ b2p[3] = r1.i[2];
+ b2p[4] = r1.i[3];
+ b2p[5] = b2p[6] = r2.i[0];
+ b2p[7] = r2.i[1];
+ b2p[8] = r2.i[2];
+ b2p[9] = r2.i[3];
+ b2p = b2p + pitch2;
+ }
+ b2p[0] = b2p[1] = b2p[2] = b2p[3] =
+ b2p[4] = b2p[5] = b2p[6] = b2p[7] =
+ b2p[8] = b2p[9] =
+ bb;
+ dst = dst + 10;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x125_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 10) / 8) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine(d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x125_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x125_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x1.5
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x15_Line(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ v4hi r3, r4, r5;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 766544322100
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[2]};
+ r4.uv = (v4ui){r1.i[2], r1.i[3], r2.i[0], r2.i[0]};
+ r5.uv = (v4ui){r2.i[1], r2.i[2], r2.i[2], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p = b2p + pitch2;
+ }
+ dst += 3;
+// b += 2;
+ }
+ } else {
+ v4hi r3, r4, r5;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 766544322100
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[2]};
+ r4.uv = (v4ui){r1.i[2], r1.i[3], r2.i[0], r2.i[0]};
+ r5.uv = (v4ui){r2.i[1], r2.i[2], r2.i[2], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p = b2p + pitch2;
+ }
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv = bb;
+ dst += 3;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x15_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 6) / 4) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x3_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x3_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x1x1
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-01-26 Move from agar_sdlscaler.cpp
+ */
+
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ register v4hi *b2p;
+ register v4hi r1, r2;
+ v4hi *d0;
+ register v4hi *b;
+ register v4hi bb2;
+ register int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(__builtin_expect((repeat < 0), 0)) return;
+ b = (v4hi *)src;
+ bb2.uv = bb;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if(bFullScan || (repeat < 2)) {
+ if(__builtin_expect((repeat >= 2), 1)) {
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = b[0];
+ r2 = b[1];
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r1;
+ b2p[1] = r2;
+ b2p = b2p + pitch2;
+ }
+ dst += 2;
+ b += 2;
+ }
+ } else { // repeat == 1
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ b2p[0] = b[0];
+ b2p[1] = b[1];
+ dst += 2;
+ b += 2;
+ }
+ }
+ } else {
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = b[0];
+ r2 = b[1];
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r1;
+ b2p[1] = r2;
+ b2p = b2p + pitch2;
+ }
+ b2p[0] = bb2;
+ b2p[1] = bb2;
+ dst += 2;
+ b += 2;
+ }
+ }
+
+}
+
+
+
+
+void pVram2RGB_x1_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v8hi_t *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ unsigned pitch;
+ int yrep2;
+ int yrep3;
+ if(Surface == NULL) return;
+
+ w = Surface->w;
+ h = Surface->h;
+
+ ww = xend - xbegin;
+ if(ww <= 0) return;
+ yrep2 = yrep;
+ d1 = (Uint32 *)(dst + x * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
--- /dev/null
+/*
+ * Zoom x2.25x2 i.e. 1440x900.
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x225(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+
+static void Scaler_DrawLine(Uint32 *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v2hi *b2p;
+ v4hi *b4p, *s4p;
+ register v2hi r1, r2, r3, r4;
+ v2hi r5v[(640 * 9) / 8 + 1];
+ v4hi *d0;
+ register v2hi *b;
+ int pitch2;
+ int ip = 0;
+#if AG_BIG_ENDIAN != 1
+ const v2ui bb = (v2ui){0xff000000, 0xff000000};
+#else
+ const v2ui bb = (v2ui){0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v2hi *)src;
+ pitch2 = pitch / sizeof(v2hi);
+
+ _prefetch_data_write_l1(r5v, sizeof(r5v));
+ if((bFullScan) || (repeat < 2)) {
+ yrep2 = repeat;
+ yrep3 = 0;
+ } else {
+ // 76543210 -> 776655444332211000
+ yrep2 = repeat - 1;
+ if(yrep2 < 1) {
+ yrep2 = 1;
+ yrep3 = 0;
+ } else {
+ yrep3 = 1;
+ }
+ }
+#ifndef __x86_64__ /* ia32 etc */
+ // 76543210 -> 776655444332211000
+ _prefetch_data_write_l1(r5v, sizeof(r5v));
+ for(xx = 0; xx < ww; xx += 8) {
+ r1 = b[0];
+ r2 = b[1];
+ r3 = b[2];
+ r4 = b[3];
+ r5v[ip + 0].uv = (v2ui){r1.i[0], r1.i[0]}; //00
+ r5v[ip + 1].uv = (v2ui){r1.i[0], r1.i[1]}; //01
+
+ r5v[ip + 2].uv = (v2ui){r1.i[1], r2.i[0]}; //12
+ r5v[ip + 3].uv = (v2ui){r2.i[0], r2.i[1]}; //23
+
+ r5v[ip + 4].uv = (v2ui){r2.i[1], r3.i[0]}; //34
+ r5v[ip + 5].uv = (v2ui){r3.i[0], r3.i[0]}; //44
+ r5v[ip + 6].uv = (v2ui){r3.i[1], r3.i[1]}; //55
+
+ r5v[ip + 7].uv = (v2ui){r4.i[0], r4.i[0]}; //66
+ r5v[ip + 8].uv = (v2ui){r4.i[1], r4.i[1]}; //77
+ ip += 9;
+ b += 4;
+ }
+ b2p = (v2hi *)dst;
+ //_prefetch_data_read_l1(r5v, sizeof(r5v));
+ for(yy = 0; yy < yrep2; yy++) {
+ memcpy((void *)b2p, (void *)r5v, ww * sizeof(Uint32));
+ b2p = b2p + pitch2;
+ }
+ for(yy = 0; yy < yrep3; yy++) {
+ for(xx = 0; xx < ip; xx++) b2p[xx].uv = bb;
+ }
+#else /* defined(__x86_64__) */
+ /* x86_64 : Using assembly. */
+ Uint32 *p;
+ p = dst;
+#if 0
+ for(yy = 0; yy < repeat; yy++) {
+ _prefetch_data_write_l1(p, ww * sizeof(Uint32));
+ p += (pitch / sizeof(Uint32));
+ }
+#endif
+ asm volatile (
+ "movl %[ww], %%edx\n\t"
+ "shr $3, %%edx\n\t"
+ "movq %[src], %%rsi\n\t"
+ "movq %[dst], %%rdi\n\t"
+ "movl %[pitch], %%r11d\n\t"
+ "movl %[rep], %%r9d\n\t"
+ "movl %[rep2], %%r10d\n\t"
+ "_l3:\n\t"
+ "movq %%rdi, %%r8\n\t"
+ "movdqu 0(%%rsi), %%xmm0\n\t"
+ "movdqu 16(%%rsi), %%xmm5\n\t"
+ "pshufd $0b11111010 ,%%xmm0, %%xmm1\n\t"
+ "pshufd $0b01010000 ,%%xmm0, %%xmm2\n\t"
+ "movd %%xmm0, %%eax\n\t"
+
+ "pshufd $0b11111010, %%xmm5, %%xmm3\n\t"
+ "pshufd $0b01010000, %%xmm5, %%xmm4\n\t"
+ "movd %%xmm5, %%ebx\n\t"
+
+ "movl %%r9d, %%ecx\n\t"
+ "movq %%r8, %%rdi\n"
+ "_l4:\n\t"
+ "movl %%eax, 0(%%rdi)\n\t"
+ "movdqu %%xmm2, 4(%%rdi)\n\t"
+ "movdqu %%xmm1, 20(%%rdi)\n\t"
+ "movl %%ebx, 36(%%rdi)\n\t"
+ "movdqu %%xmm4, 40(%%rdi)\n\t"
+ "movdqu %%xmm3, 56(%%rdi)\n\t"
+ "addq %%r11, %%rdi\n\t"
+ "dec %%ecx\n\t"
+ "jnz _l4\n\t"
+ "movl %%r10d, %%ecx\n\t"
+ "cmpl $0x00000000, %%ecx\n\t"
+ "jz _l5\n\t"
+ "_l6:\n\t"
+ "movq $0xff000000ff000000, %%r12\n\t"
+ "movq %%r12, %%xmm6\n\t"
+ "movdqu %%xmm6, 0(%%rdi)\n\t"
+ "movdqu %%xmm6, 16(%%rdi)\n\t"
+ "movdqu %%xmm6, 32(%%rdi)\n\t"
+ "movdqu %%xmm6, 48(%%rdi)\n\t"
+ "movq %%r12, 64(%%rdi)\n\t"
+ "dec %%ecx\n\t"
+ "jnz _l6\n\t"
+ "_l5:\n\t"
+ "addq $32, %%rsi\n\t"
+ "movq %%r8, %%rdi\n\t"
+ "addq $72, %%rdi\n\t"
+ "dec %%edx\n\t"
+ "jnz _l3\n\t"
+ :
+ : [pitch] "rm"(pitch), [ww]"rm" (ww),
+ [rep] "rm"(yrep2),[rep2] "rm"(yrep3),
+ [src] "rm" (src), [dst] "rm" (dst)
+ : "eax","ebx","rdi", "rsi", "ecx", "edx",
+ "r8", "r9", "r10", "r11", "r12",
+ "xmm0","xmm1","xmm2",
+ "xmm3","xmm4", "xmm5", "xmm6");
+
+#endif
+}
+
+
+
+
+void pVram2RGB_x225_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+ if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 18) / 8) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine(d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x2_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x2_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x2x2
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x25(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ register v4hi *b2p;
+ register v4hi r1, r2;
+ v4hi *d0;
+ register v4hi *b;
+ int pitch2;
+ register int ip;
+ v4hi r3v[5 * 80];
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+ const v4ui order3 = (v4ui){3, 3, 4, 4};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+ const v4ui order3 = (v4ui){3, 3, 4, 4};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+// _prefetch_data_read_l2((void *)src, sizeof(Uint32) * ww);
+ _prefetch_data_write_l1((void *)r3v, sizeof(r3v));
+ if(__builtin_expect(((bFullScan) || (repeat < 2)), 1)) {
+ ip = 0;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = b[0];
+ r2 = b[1];
+ // 76543210 -> 77666554443322211000
+ r3v[ip + 0].uv = __builtin_ia32_pshufd(r1.uv, 0b01000000);
+ r3v[ip + 1].uv = __builtin_ia32_pshufd(r1.uv, 0b10101001);
+ r3v[ip + 2] = (v4hi)__builtin_shuffle(r1.uv, r2.uv, order3);
+ r3v[ip + 3].uv = __builtin_ia32_pshufd(r2.uv, 0b10010100);
+ r3v[ip + 4].uv = __builtin_ia32_pshufd(r2.uv, 0b11111010);
+ ip += 5;
+ b += 2;
+ }
+
+ for(yy = 0; yy < repeat; yy++) {
+ // _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5);
+ memcpy((void *)b2p, (void *)r3v, sizeof(v4hi) * ip);
+ b2p = b2p + pitch2;
+ }
+ } else {
+ ip = 0;
+ for(xx = 0; xx < ww; xx += 8) {
+ yy = 0;
+// b2p = dst;
+ r1 = b[0];
+ r2 = b[1];
+ // 76543210 -> 77666554443322211000
+ r3v[ip + 0].uv = __builtin_ia32_pshufd(r1.uv, 0b01000000);
+ r3v[ip + 1].uv = __builtin_ia32_pshufd(r1.uv, 0b10101001);
+ r3v[ip + 2] = (v4hi)__builtin_shuffle(r1.uv, r2.uv, order3);
+ r3v[ip + 3].uv = __builtin_ia32_pshufd(r2.uv, 0b10010100);
+ r3v[ip + 4].uv = __builtin_ia32_pshufd(r2.uv, 0b11111010);
+ ip += 5;
+ b += 2;
+ }
+ b2p = dst;
+ for(yy = 0; yy < repeat - 1; yy++) {
+// _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5);
+ memcpy((void *)b2p, (void *)r3v, ip * sizeof(v4hi));
+ b2p = b2p + pitch2;
+ }
+// _prefetch_data_write_l2((void *)b2p, sizeof(v4hi) * 5);
+ for(xx = 0; xx < ip; xx++) b2p[xx].uv = bb;
+ }
+
+}
+
+
+
+void pVram2RGB_x25_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+ if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 20) / 16) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x25_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x25_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x2x2
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x2(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+// b2p = d0;
+// b2.vv = __builtin_ia32_pshufd(b[0].v, 0x50);
+// b3.vv = __builtin_ia32_pshufd(b[0].v, 0xfa);
+
+// b4.vv = __builtin_ia32_pshufd(b[1].v, 0x50);
+// b5.vv = __builtin_ia32_pshufd(b[1].v, 0xfa);
+
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+
+#ifndef __x86_64__
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ volatile v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ v4hi bb2;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ bb2.uv = bb;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ volatile v4hi r3, r4, r5, r6;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ r3.vv = __builtin_ia32_pshufd(r1.vv, 0x50);
+ r4.vv = __builtin_ia32_pshufd(r1.vv, 0xfa);
+
+ r5.vv = __builtin_ia32_pshufd(r2.vv, 0x50);
+ r6.vv = __builtin_ia32_pshufd(r2.vv, 0xfa);
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+
+ b2p = b2p + pitch2;
+ }
+ dst = dst + 4;
+// b += 2;
+ }
+ } else {
+ volatile v4hi r3, r4, r5, r6;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ r3 = r1;
+ r4 = r1;
+ r5 = r2;
+ r6 = r2;
+ r3.vv = __builtin_ia32_pshufd(r1.vv, 0x50);
+ r4.vv = __builtin_ia32_pshufd(r1.vv, 0xfa);
+
+ r5.vv = __builtin_ia32_pshufd(r2.vv, 0x50);
+ r6.vv = __builtin_ia32_pshufd(r2.vv, 0xfa);
+ for(yy = 0; yy < repeat - 1; yy++) {
+ *b2p++ = r3;
+ *b2p++ = r4;
+ *b2p++ = r5;
+ *b2p++ = r6;
+ b2p = b2p + (pitch2 - 4);
+ }
+ *b2p++ = bb2;
+ *b2p++ = bb2;
+ *b2p++ = bb2;
+ *b2p++ = bb2;
+ dst += 4;
+// b += 2;
+ }
+ }
+#else /* __x86_64__ */
+ int yrep2, yrep3;
+
+ if(repeat <= 0) return;
+ if((bFullScan) || (repeat < 2)) {
+ yrep2 = repeat;
+ if(yrep2 < 1) yrep2 = 1;
+ yrep3 = 0;
+ } else {
+ yrep2 = repeat - 1;
+ yrep3 = 1;
+ }
+ // 7766554433221100
+ asm ( "/* _dst: .equ 40 */\n\t"
+ "/*_count0: .equ 32 */\n\t"
+ "/*_count1: .equ 24 */\n\t"
+ "/*_count2: .equ 16 */\n\t"
+ "/*_yrep2: .equ 8 */\n\t"
+ "/*_yrep3: .equ 0 */\n\t"
+ "subq $64, %%rsp /* Allocate local value */\n\t"
+ "movq %[src], %%rsi\n\t"
+ "movq %[dst], %%rdi\n\t"
+ "movq %%rdi, 40(%%rsp) /* _dst */\n\t"
+
+ "movl %[pitch], %%eax\n\t"
+ "movq %%rax, %%r10 /* pitch */\n\t"
+
+ "movl %[ww], %%ecx \n\t"
+ "shrl $3, %%ecx\n\t"
+ "movl %%ecx, 32(%%rsp) /* _count0 */\n\t"
+
+ "movl %[rep2], %%r11d\n\t"
+ "movl %[rep3], %%r12d\n\t"
+ "movl %%r12d, 0(%%rsp) /* _yrep3 */\n\t"
+ "cmpl $0, %%r11d\n\t"
+ "je _l2\n\t"
+ "movl %%r11d, 8(%%rsp) /* _yrep2 */\n\t"
+
+ "cmpl $0, %%ecx\n\t"
+ "je _exit0\n\t"
+
+ "_l0: \n\t"
+ "movdqu 0(%%rsi), %%xmm0 /* 0123 */\n\t"
+ "movdqu 16(%%rsi), %%xmm1 /* 4567 */\n\t"
+ "pshufd $0b01010000, %%xmm0, %%xmm2 /* 2233 */\n\t"
+ "pshufd $0b11111010, %%xmm0, %%xmm0 /* 0011 */\n\t"
+ "pshufd $0b01010000, %%xmm1, %%xmm3 /* 6677 */\n\t"
+ "pshufd $0b11111010, %%xmm1, %%xmm1 /* 4455 */\n\t"
+ "addq $32, %%rsi\n\t"
+ "movl %%r11d, %%r13d\n\t"
+ "movq %%rdi, %%r14\n\t"
+ "_l0a: \n\t"
+ "movdqu %%xmm2, 0(%%rdi)\n\t"
+ "movdqu %%xmm0, 16(%%rdi)\n\t"
+ "movdqu %%xmm3, 32(%%rdi)\n\t"
+ "movdqu %%xmm1, 48(%%rdi)\n\t"
+ "addq %%r10, %%rdi\n\t"
+ "decl %%r13d\n\t"
+ "jnz _l0a\n\t"
+ "addq $64, %%r14\n\t"
+ "movq %%r14, %%rdi\n\t"
+ "decl %%ecx\n\t"
+ "jnz _l0\n\t"
+
+ "movl 0(%%rsp), %%ecx /* _yrep3 */\n\t"
+ "cmpl $1, %%ecx\n\t"
+ "jl _exit0\n\t"
+
+ "movq 40(%%rsp), %%rdi /* _dst */\n\t"
+ "movl 8(%%rsp), %%eax /* _yrep2 */\n\t"
+ "mulq %%r10\n\t"
+ "addq %%rax, %%rdi\n\t"
+ "movq %%rdi, %%r14\n\t"
+
+ "movl $0xff000000, %%eax /* ABGR */\n\t"
+ "movd %%eax, %%xmm0\n\t"
+ "pshufd $0b00000000, %%xmm0, %%xmm0\n\t"
+
+ "_l2: \n\t"
+ "movl 32(%%rsp), %%r8d /* _count0 */\n\t"
+ "cmpl $1, %%r8d\n\t"
+ "jl _exit0\n\t"
+
+ "_l2a:\n\t"
+ "movdqu %%xmm0, 0(%%rdi)\n\t"
+ "movdqu %%xmm0, 16(%%rdi)\n\t"
+ "movdqu %%xmm0, 32(%%rdi)\n\t"
+ "movdqu %%xmm0, 48(%%rdi)\n\t"
+ "addq $64, %%rdi\n\t"
+
+ "decl %%r8d\n\t"
+ "jnz _l2a\n\t"
+
+ "movq %%r14, %%rdi\n\t"
+ "addq %%r10, %%rdi\n\t"
+ "movq %%rdi, %%r14\n\t"
+ "decl %%ecx\n\t"
+ "jnz _l2\n\t"
+
+ "_exit0:\n\t"
+ "addq $64, %%rsp /* Free local value */\n\t"
+ :
+ : [src] "rm" (src), [dst] "rm" (dst), [pitch] "rm" (pitch),
+ [ww] "rm" (ww), [rep2] "rm" (yrep2), [rep3] "rm" (yrep3)
+ : "xmm0", "xmm1", "xmm2", "xmm3",
+ "rax", "rcx", "rdi", "rsi", "r10", "r11", "r12", "r13", "r14" );
+
+#endif
+}
+
+
+
+void pVram2RGB_x2_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+ if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + x * 2 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x2_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x2_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x3
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x3_Line(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+static inline void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ if((bFullScan) || (repeat < 2)) {
+ yrep2 = repeat;
+ yrep3 = 0;
+ } else {
+ yrep2 = repeat - 1;
+ yrep3 = 1;
+ }
+
+
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ {
+ v4hi r3, r4, r5, r6, r7, r8;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 777766666555544444333322222111100000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[1]};
+ r4.uv = (v4ui){r1.i[1], r1.i[1], r1.i[2], r1.i[2]};
+ r5.uv = (v4ui){r1.i[2], r1.i[3], r1.i[3], r1.i[3]};
+
+ r6.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[1]};
+ r7.uv = (v4ui){r2.i[1], r2.i[1], r2.i[2], r2.i[2]};
+ r8.uv = (v4ui){r2.i[2], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < yrep2; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p = b2p + pitch2;
+ }
+ for(yy = 0; yy < yrep3; yy++) {
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv =
+ b2p[3].uv =
+ b2p[4].uv =
+ b2p[5].uv = bb;
+ b2p = b2p + pitch2;
+ }
+ dst += 6;
+ }
+
+ }
+
+}
+
+
+
+
+void pVram2RGB_x3_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + x * 3 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x3_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x3_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x4.5
+ * (C) 2014 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x45_Line(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ if(repeat <= 0) return;
+
+# ifndef __x86_64__
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 777766666555544444333322222111100000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[1], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[2], r1.i[2], r1.i[2]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};
+ r7.uv = (v4ui){r1.i[3], r1.i[3], r2.i[0], r2.i[0]};
+
+ r8.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[1]};
+ r9.uv = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[2]};
+ r10.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};
+ r11.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p = b2p + pitch2;
+ }
+ dst += 9;
+// b += 2;
+ }
+ } else {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ // 76543210 -> 777766666555544444333322222111100000
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[1], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[2], r1.i[2], r1.i[2]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};
+ r7.uv = (v4ui){r1.i[3], r1.i[3], r2.i[0], r2.i[0]};
+
+ r8.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[1]};
+ r9.uv = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[2]};
+ r10.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};
+ r11.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p = b2p + pitch2;
+ }
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv =
+ b2p[3].uv =
+ b2p[4].uv =
+ b2p[5].uv =
+ b2p[6].uv =
+ b2p[7].uv =
+ b2p[8].uv = bb;
+ dst += 9;
+// b += 2;
+ }
+ }
+#else // __x86_64__
+
+ if((bFullScan) || (repeat < 2)) {
+ yrep2 = repeat;
+ if(yrep2 < 1) yrep2 = 1;
+ yrep3 = 0;
+ } else {
+ yrep2 = repeat - 1;
+ yrep3 = 1;
+ }
+ // 76543210 -> 7777 6666 6555 5444 4433 3322 2221 1110 0000
+ asm (
+ "subq $64, %%rsp /* Allocate local value */\n\t"
+ "movq %[src], %%rsi\n\t"
+ "movq %[dst], %%rdi\n\t"
+ "movl %[pitch], %%eax\n\t"
+ "movq %%rax, %%r10 /* pitch */\n\t"
+ "movq %%rdi, 40(%%rsp) /* dst */\n\t"
+ "movl %[ww], %%ecx\n\t"
+ "shr $3, %%ecx\n\t"
+ "movl %%ecx, 32(%%rsp) /* r10 / 16(rsp) = count(ww) */\n\t"
+ "movl %%ecx, 24(%%rsp) /* r10 / 16(rsp) = count(ww) */\n\t"
+ "movl %%ecx, 16(%%rsp) /* r10 / 16(rsp) = count(ww) */\n\t"
+ "movl %[rep2], %%ecx \n\t"
+ "movl %%ecx, 8(%%rsp) /* r9 / 8(rsp) = yrep2 */\n\t"
+ "movl %[rep3], %%ecx \n\t"
+ "movl %%ecx, 0(%%rsp) /* r8 / 0(rsp) = yrep3 */\n\t"
+ "movq $0, %%r11 /* Set offset counter of source */\n\t"
+ "movl 32(%%rsp), %%ecx \n\t"
+ "_l0:\n\t"
+ "movl %%ecx, 16(%%rsp) \n\t"
+ "/* Get upper to xmm0 */\n\t"
+ "movdqu 0(%%rsi), %%xmm0 /* Get Upper */\n\t"
+ "movdqu 16(%%rsi), %%xmm5 /* Get Lower */\n\t"
+ "/* 76543210 -> 7777 6666 6555 5444 4433 3322 2221 1110 0000 */"
+ "pshufd $0b11111111, %%xmm0, %%xmm1 /* 7777 -> xmm1 */\n\t"
+ "pshufd $0b11101010, %%xmm0, %%xmm2 /* 6666 -> xmm2 */\n\t"
+ "pshufd $0b10100101, %%xmm0, %%xmm3 /* 6555 -> xmm3 */\n\t"
+ "pshufd $0b01010000, %%xmm0, %%xmm4 /* 5444 -> xmm4 */\n\t"
+ "addq $32, %%rsi\n\t"
+ "movd %%xmm1, %%eax /* $00,$00,0,4 */\n\t"
+ "movd %%eax, %%xmm6\n\t"
+ "pshufd $0b11110000, %%xmm6, %%xmm6\n\t"
+ "/* Store higher */\n\t"
+ "movq $0, %%r13 /* r13 -> offset */\n\t"
+ "movl 8(%%rsp), %%ecx /* yrep2 */\n\t"
+ "pushq %%rdi\n\t"
+ "_l1a:\n\t"
+ "movdqu %%xmm4, 0(%%rdi) /* store 6666 */\n\t"
+ "movdqu %%xmm3, 16(%%rdi) /* store 6555 */\n\t"
+ "movdqu %%xmm2, 32(%%rdi) /* store 5444 */\n\t"
+ "movdqu %%xmm1, 48(%%rdi) /* store 5444 */\n\t"
+ "addq %%r10, %%rdi\n\t"
+ "dec %%ecx\n\t"
+ "jnz _l1a\n\t"
+ "popq %%rdi\n\t"
+ "pshufd $0b11111111, %%xmm5, %%xmm1 /* 3322 */\n\t"
+ "pshufd $0b11101010, %%xmm5, %%xmm2 /* 2221 */\n\t"
+ "pshufd $0b10100101, %%xmm5, %%xmm3 /* 1110 */\n\t"
+ "pshufd $0b01010000, %%xmm5, %%xmm4 /* 0000 */\n\t"
+ "movd %%xmm4, %%eax\n\t"
+ "movd %%eax, %%xmm0\n\t"
+ "pshufd $0b00001111, %%xmm0, %%xmm0\n\t"
+ "por %%xmm0, %%xmm6\n\t"
+ "movl 8(%%rsp), %%ecx\n\t"
+ "movq $0, %%r13 /* r13 -> offset */\n\t"
+ "pushq %%rdi\n\t"
+ "_l1b:\n\t"
+ "movdqu %%xmm6, 64(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm4, 80(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm3, 96(%%rdi) /* store 2221 */\n\t"
+ "movdqu %%xmm2, 112(%%rdi) /* store 1110 */\n\t"
+ "movdqu %%xmm1, 128(%%rdi) /* store 0000 */\n\t"
+ "addq %%r10, %%rdi\n\t"
+ "dec %%ecx\n\t"
+ "jnz _l1b\n\t"
+ "popq %%rdi\n\t"
+ "addq $144, %%rdi\n\t"
+ "addq $4, %%r11\n\t"
+ "movl 16(%%rsp), %%ecx\n\t"
+ "dec %%ecx\n\t"
+ "jnz _l0\n\t"
+
+ "movl 0(%%rsp), %%ebx\n\t"
+ "cmpl $0, %%ebx /* cmp yrep3, 0 */\n\t"
+ "jz _l2c\n\t"
+
+ "/* clear */"
+ "movl $0xff000000, %%eax\n\t"
+ "movd %%eax, %%xmm0\n\t"
+ "pshufd $0b00000000, %%xmm0, %%xmm0\n\t"
+ "_l2a:\n\t"
+ "movq 40(%%rsp), %%rdi\n\t"
+ "movl 32(%%rsp), %%ecx\n\t"
+ "pushq %%rdi\n\t"
+ "_l2b:\n\t"
+ "movdqu %%xmm0, 0(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm0, 16(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm0, 32(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm0, 48(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm0, 64(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm0, 80(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm0, 96(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm0, 112(%%rdi) /* store 3322 */\n\t"
+ "movdqu %%xmm0, 128(%%rdi) /* store 3322 */\n\t"
+ "addq $144, %%rdi\n\t"
+ "dec %%ecx\n\t"
+ "jnz _l2b\n\t"
+ "popq %%rdi\n\t"
+ "addq %%r10, %%r13\n\t"
+ "dec %%ebx\n\t"
+ "jnz _l2a\n\t"
+ "_l2c:\n\t"
+ "addq $64, %%rsp"
+ :
+ : [src] "rm" (src), [dst] "rm" (dst), [pitch] "rm" (pitch),
+ [ww] "rm" (ww), [rep2] "rm" (yrep2), [rep3] "rm" (yrep3)
+ : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "rax", "rbx", "rcx", "rdi", "rsi", "r10", "r11", "r12", "r13" );
+#endif
+}
+
+
+
+void pVram2RGB_x45_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + ((x * 18) / 4) * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x45_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x45_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x4x4
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-01-26 Move from agar_sdlscaler.cpp
+ * 2013-09-17 Move from scaler/generic/scaler_x4.c
+ */
+
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+#if defined(__SSE2__)
+void pVram2RGB_x4_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+// AG_SurfaceLock(Surface);
+ w = Surface->w;
+ h = Surface->h;
+
+ ww = xend - xbegin;
+ if(ww > (w / 4)) ww = w / 4;
+ ww = ww - 7;
+ if(ww <= 0) return;
+// if(yrep < 2) {
+// if(y >= h) return;
+// } else {
+// if(y >= (h / (yrep >> 1))) return;/
+// }
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+ yrep2 = yrep;
+ d1 = (Uint32 *)(dst+ x * 4 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+
+ pitch = Surface->pitch / sizeof(Uint32);
+ { // Not thinking align ;-(
+
+ int j;
+ register v4hi b2, b3, b4, b5, b6, b7, b8, b9;
+ register v4hi bb;
+ register v4hi bx0, bx1;
+ v4hi *b2p;
+ Uint32 *d0;
+
+ b = (v4hi *)d2;
+ bb.i[0] = bb.i[1] = bb.i[2] = bb.i[3] = black;
+ //if((((y * yrep2) % 16) == 0) && ((yrep2 % 16) != 0)) yrep2 += 16;
+ //yrep2 >>= 4;
+ switch(yrep2) {
+ case 0:
+ case 1:
+// case 2:
+ _prefetch_data_write_l2(d1, sizeof(v4hi) * 8 * ww);
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = (v4hi *)d1;
+ bx0 = b[0];
+ bx1 = b[1];
+ b2.vv = __builtin_ia32_pshufd(bx0.vv, 0x00);
+ b3.vv = __builtin_ia32_pshufd(bx0.vv, 0x55);
+ b4.vv = __builtin_ia32_pshufd(bx0.vv, 0xaa);
+ b5.vv = __builtin_ia32_pshufd(bx0.vv, 0xff);
+
+ b6.vv = __builtin_ia32_pshufd(bx1.vv, 0x00);
+ b7.vv = __builtin_ia32_pshufd(bx1.vv, 0x55);
+ b8.vv = __builtin_ia32_pshufd(bx1.vv, 0xaa);
+ b9.vv = __builtin_ia32_pshufd(bx1.vv, 0xff);
+
+ *b2p++ = b2;
+ *b2p++ = b3;
+ *b2p++ = b4;
+ *b2p++ = b5;
+ *b2p++ = b6;
+ *b2p++ = b7;
+ *b2p++ = b8;
+ *b2p++ = b9;
+ d1 += 32;
+ b += 2;
+ }
+ break;
+ default:
+ d0 = d1;
+ _prefetch_data_write_l2(d1, sizeof(v4hi) * 8 * ww);
+ for(xx = 0; xx < ww; xx += 8){
+ d1 = d0;
+ b2p = (v4hi *)d1;
+ bx0 = b[0];
+ bx1 = b[1];
+ b2.vv = __builtin_ia32_pshufd(bx0.vv, 0x00);
+ b3.vv = __builtin_ia32_pshufd(bx0.vv, 0x55);
+ b4.vv = __builtin_ia32_pshufd(bx0.vv, 0xaa);
+ b5.vv = __builtin_ia32_pshufd(bx0.vv, 0xff);
+
+ b6.vv = __builtin_ia32_pshufd(bx1.vv, 0x00);
+ b7.vv = __builtin_ia32_pshufd(bx1.vv, 0x55);
+ b8.vv = __builtin_ia32_pshufd(bx1.vv, 0xaa);
+ b9.vv = __builtin_ia32_pshufd(bx1.vv, 0xff);
+
+ for(j = 0; j < yrep2; j++) {
+ b2p = (v4hi *)d1;
+ _prefetch_data_write_l2(d1, sizeof(v4hi) * 8);
+ if(!bFullScan && (j >= (yrep2 - 1))) {
+ b2p[0] =
+ b2p[1] =
+ b2p[2] =
+ b2p[3] =
+ b2p[4] =
+ b2p[5] =
+ b2p[6] =
+ b2p[7] = bb;
+ } else {
+ b2p[0] = b2;
+ b2p[1] = b3;
+ b2p[2] = b4;
+ b2p[3] = b5;
+ b2p[4] = b6;
+ b2p[5] = b7;
+ b2p[6] = b8;
+ b2p[7] = b9;
+ }
+ d1 += pitch;
+ }
+ d0 += 32;
+ b += 2;
+ }
+ break;
+ }
+ }
+// AG_SurfaceUnlock(Surface);
+}
+
+
+#else // NON-SSE2
+void pVram2RGB_x4_SSE2_Line(Uint32 *src, int xbegin, int xend, int y, float yrep)
+{
+ pVram2RGB_x4_Line(Uint32 *src, xbegin, xend, int y, yrep);
+}
+#endif
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x2x2
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x5_Line(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ register v4hi r1, r2;
+ register v4hi *b;
+ v4hi r3v[10 * 80];
+ int ip;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ v4ui bb2 = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ v4ui bb2 = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ _prefetch_data_write_l1(r3v, sizeof(r3v));
+ if(__builtin_expect(((bFullScan) || (repeat < 2)), 0)) {
+ ip = 0;
+ for(xx = 0; xx < ww; xx += 8) {
+ r1 = b[0];
+ r2 = b[1];
+ r3v[ip + 0].uv = __builtin_ia32_pshufd(r1.uv, 0b00000000); // 0000
+ r3v[ip + 1].uv = __builtin_ia32_pshufd(r1.uv, 0b01010100); // 0111
+ r3v[ip + 2].uv = __builtin_ia32_pshufd(r1.uv, 0b10100101); // 1122
+ r3v[ip + 3].uv = __builtin_ia32_pshufd(r1.uv, 0b11101010); // 2223
+ r3v[ip + 4].uv = __builtin_ia32_pshufd(r1.uv, 0b11111111); // 3333
+
+ r3v[ip + 5].uv = __builtin_ia32_pshufd(r2.uv, 0b00000000); // 0000
+ r3v[ip + 6].uv = __builtin_ia32_pshufd(r2.uv, 0b01010100); // 0111
+ r3v[ip + 7].uv = __builtin_ia32_pshufd(r2.uv, 0b10100101); // 1122
+ r3v[ip + 8].uv = __builtin_ia32_pshufd(r2.uv, 0b11101010); // 2223
+ r3v[ip + 9].uv = __builtin_ia32_pshufd(r2.uv, 0b11111111); // 3333
+
+ ip += 10;
+ b += 2;
+ }
+ _prefetch_data_read_l1(r3v, sizeof(r3v));
+ for(yy = 0; yy < repeat; yy++) {
+ memcpy(b2p, r3v, ip * sizeof(v4hi));
+ b2p = b2p + pitch2;
+ }
+ } else {
+ ip = 0;
+ for(xx = 0; xx < ww; xx += 8) {
+ r1 = b[0];
+ r2 = b[1];
+ r3v[ip + 0].uv = __builtin_ia32_pshufd(r1.uv, 0b00000000); // 0000
+ r3v[ip + 1].uv = __builtin_ia32_pshufd(r1.uv, 0b01010100); // 0111
+ r3v[ip + 2].uv = __builtin_ia32_pshufd(r1.uv, 0b10100101); // 1122
+ r3v[ip + 3].uv = __builtin_ia32_pshufd(r1.uv, 0b11101010); // 2223
+ r3v[ip + 4].uv = __builtin_ia32_pshufd(r1.uv, 0b11111111); // 3333
+
+ r3v[ip + 5].uv = __builtin_ia32_pshufd(r2.uv, 0b00000000); // 0000
+ r3v[ip + 6].uv = __builtin_ia32_pshufd(r2.uv, 0b01010100); // 0111
+ r3v[ip + 7].uv = __builtin_ia32_pshufd(r2.uv, 0b10100101); // 1122
+ r3v[ip + 8].uv = __builtin_ia32_pshufd(r2.uv, 0b11101010); // 2223
+ r3v[ip + 9].uv = __builtin_ia32_pshufd(r2.uv, 0b11111111); // 3333
+ ip += 10;
+ b += 2;
+ }
+ _prefetch_data_read_l1(r3v, sizeof(r3v));
+ for(yy = 0; yy < repeat - 1; yy++) {
+ memcpy(b2p, r3v, ip * sizeof(v4hi));
+ b2p = b2p + pitch2;
+ }
+ for(xx = 0; xx < ip; xx++) b2p[xx].uv = bb2;
+ }
+
+}
+
+
+
+void pVram2RGB_x5_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + x * 5 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x5_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x5_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
--- /dev/null
+/*
+ * Zoom x2x2
+ * (C) 2013 K.Ohta
+ *
+ * History:
+ * 2013-04-02 Move from scaler_x2.c
+ */
+#include "agar_sdlview.h"
+#include "api_vram.h"
+#include "api_draw.h"
+#include "sdl_cpuid.h"
+#include "cache_wrapper.h"
+
+extern struct XM7_CPUID *pCpuID;
+
+extern void pVram2RGB_x6_Line(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
+
+#if defined(__SSE2__)
+static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
+{
+ int xx;
+ int yy;
+ int yrep2;
+ int yrep3;
+ int blank;
+ v4hi *b2p;
+ v4hi r1, r2;
+ v4hi *d0;
+ v4hi *b;
+ int pitch2;
+#if AG_BIG_ENDIAN != 1
+ const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
+#else
+ const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
+#endif
+
+ if(repeat <= 0) return;
+ b = (v4hi *)src;
+ b2p = dst;
+ pitch2 = pitch / sizeof(v4hi);
+ if((bFullScan) || (repeat < 2)) {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11, r12;
+ v4hi r13, r14;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[1], r1.i[1], r1.i[1]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[2], r1.i[2]};
+ r7.uv = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};
+ r8.uv = (v4ui){r1.i[3], r1.i[3], r1.i[3], r1.i[3]};
+
+ r9.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[0]};
+ r10.uv = (v4ui){r2.i[0], r2.i[0], r2.i[1], r2.i[1]};
+ r11.uv = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[1]};
+ r12.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};
+ r13.uv = (v4ui){r2.i[2], r2.i[2], r2.i[3], r2.i[3]};
+ r14.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p[9] = r12;
+ b2p[10] = r13;
+ b2p[11] = r14;
+ b2p = b2p + pitch2;
+ }
+ dst += 10;
+// b += 2;
+ }
+ } else {
+ v4hi r3, r4, r5, r6, r7;
+ v4hi r8, r9, r10, r11, r12;
+ v4hi r13, r14;
+ for(xx = 0; xx < ww; xx += 8) {
+ b2p = dst;
+ r1 = *b++;
+ r2 = *b++;
+
+ r3.uv = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};
+ r4.uv = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[1]};
+ r5.uv = (v4ui){r1.i[1], r1.i[1], r1.i[1], r1.i[1]};
+ r6.uv = (v4ui){r1.i[2], r1.i[2], r1.i[2], r1.i[2]};
+ r7.uv = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};
+ r8.uv = (v4ui){r1.i[3], r1.i[3], r1.i[3], r1.i[3]};
+
+ r9.uv = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[0]};
+ r10.uv = (v4ui){r2.i[0], r2.i[0], r2.i[1], r2.i[1]};
+ r11.uv = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[1]};
+ r12.uv = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};
+ r13.uv = (v4ui){r2.i[2], r2.i[2], r2.i[3], r2.i[3]};
+ r14.uv = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
+ for(yy = 0; yy < repeat - 1; yy++) {
+ b2p[0] = r3;
+ b2p[1] = r4;
+ b2p[2] = r5;
+ b2p[3] = r6;
+ b2p[4] = r7;
+ b2p[5] = r8;
+ b2p[6] = r9;
+ b2p[7] = r10;
+ b2p[8] = r11;
+ b2p[9] = r12;
+ b2p[10] = r13;
+ b2p[11] = r14;
+ b2p = b2p + pitch2;
+ }
+ b2p[0].uv =
+ b2p[1].uv =
+ b2p[2].uv =
+ b2p[3].uv =
+ b2p[4].uv =
+ b2p[5].uv =
+ b2p[6].uv =
+ b2p[7].uv =
+ b2p[8].uv =
+ b2p[9].uv =
+ b2p[10].uv =
+ b2p[11].uv = bb;
+ dst += 12;
+// b += 2;
+ }
+ }
+
+}
+
+
+
+void pVram2RGB_x6_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
+{
+ register v4hi *b;
+ AG_Surface *Surface = GetDrawSurface();
+ Uint32 *d1;
+ Uint32 *d2;
+ Uint32 *p;
+ int w;
+ int h;
+ int yy;
+ int xx;
+ int hh;
+ int ww;
+ int i;
+ int x = xbegin;
+ int yrep2;
+ unsigned pitch;
+ Uint32 black;
+ if(Surface == NULL) return;
+ w = Surface->w;
+ h = Surface->h;
+
+
+ ww = xend - xbegin;
+// if(ww > (w / 2)) ww = w / 2;
+ ww = (ww / 8) * 8;
+ if(ww <= 0) return;
+
+
+#if AG_BIG_ENDIAN != 1
+ black = 0xff000000;
+#else
+ black = 0x000000ff;
+#endif
+// yrep = yrep * 16.0f;
+
+ yrep2 = yrep;
+
+ d1 = (Uint32 *)((Uint8 *)dst + x * 6 * Surface->format->BytesPerPixel);
+ d2 = &src[x + y * 640];
+ Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
+// AG_SurfaceUnlock(Surface);
+ return;
+}
+
+
+#else
+
+void pVram2RGB_x6_Line_SSE2(Uint32 *src, int xbegin, int xend, int y, int yrep)
+{
+ pVram2RGB_x6_Line(src, dst, x, y, yrep);
+}
+
+#endif // __SSE2__
\ No newline at end of file
DWORD dwDataLength;
} wavheader_t;
+
void EMU::AudioCallbackSDL(void *udata, Uint8 *stream, int len)
{
int pos;
// secondary buffer
uBufSize = (100 * SndSpecPresented.freq * SndSpecPresented.channels * 2) / 1000;
- pSoundBuf = malloc(uBufSize * sizeof(sint16_t));
+ pSoundBuf = malloc(uBufSize * sizeof(Sint16));
if(pSoundBuf == NULL) {
SDL_CloseAudio();
return;
return;
}
+ ZeroMemory(pSoundBuf, uBufSize * sizeof(Sint16));
sound_ok = first_half = true;
}
if(sound_ok) {
DWORD play_c, offset, size1, size2;
- sint16_t *ptr1, *ptr2;
+ Sint16 *ptr1, *ptr2;
// start play
if(!sound_started) {
}
SDL_LockAudio();
// check current position
- play_c = nSndWritePos * sizeof(sint16_t);
+ play_c = nSndWritePos * sizeof(Sint16);
if(first_half) {
if(play_c < (uBufSize / 2)) {
SDL_UnlockAudio();
SDL_UnlockAudio();
// sound buffer must be updated
- uint16* sound_buffer = vm->create_sound(extra_frames);
+ uint16_t* sound_buffer = (uint16_t)vm->create_sound(extra_frames);
if(now_rec_sound) {
// record sound
if(sound_samples > rec_buffer_ptr) {
int pos;
int pos2;
SDL_LockAudio();
- ssize = sound_samples * SndSpecPresented.channels;
- pos = nSndDataPos;
- pos2 = pos + ssize;
- ptr1 = &pSoundBuf[pos];
- if(pos2 >= uBufSize) {
- size1 = uBufSize - pos;
- size2 = pos2 - uBufSize;
- ptr2 = &pSoundBuf[0];
- } else {
- size1 = ssize;
- size2 = 0;
- ptr2 = NULL;
- }
- if(ptr1) {
- CopyMemory(ptr1, sound_buffer, size1);
- }
- if(ptr2) {
- CopyMemory(ptr2, sound_buffer + size1, size2);
+ if(pSndApplySem) {
+ SDL_SemWait(pSndApplySem);
+ ssize = sound_samples * SndSpecPresented.channels;
+ pos = nSndDataPos;
+ pos2 = pos + ssize;
+ ptr1 = &pSoundBuf[pos];
+ if(pos2 >= uBufSize) {
+ size1 = uBufSize - pos;
+ size2 = pos2 - uBufSize;
+ ptr2 = &pSoundBuf[0];
+ } else {
+ size1 = ssize;
+ size2 = 0;
+ ptr2 = NULL;
+ }
+ if(ptr1) {
+ CopyMemory(ptr1, sound_buffer, size1 * sizeof(Sint16));
+ }
+ if(ptr2) {
+ CopyMemory(ptr2, sound_buffer + size1, size2 * sizeof(Sint16));
+ }
+ nSndDataPos = (nSndDataPos + ssize) % uBufSize;
+ SDL_SemPost(pSndApplySem);
}
- nSndDataPos = (nSndDataPos + ssize) % uBufSize;
SDL_UnlockAudio();
}
SDL_PauseAudio(0);
if(!now_mute && sound_ok) {
// check current position
DWORD size1, size2;
+
WORD *ptr1, *ptr2;
// WIP
-
- if(ptr1) {
- ZeroMemory(ptr1, size1);
- }
- if(ptr2) {
- ZeroMemory(ptr2, size2);
+ int ssize;
+ int pos;
+ int pos2;
+ if(pSndApplySem) {
+ SDL_SemWait(pSndApplySem);
+ SDL_LockAudio();
+ ssize = sound_samples * SndSpecPresented.channels;
+ pos = nSndDataPos;
+ pos2 = pos + ssize;
+ ptr1 = &pSoundBuf[pos];
+ if(pos2 >= uBufSize) {
+ size1 = uBufSize - pos;
+ size2 = pos2 - uBufSize;
+ ptr2 = &pSoundBuf[0];
+ } else {
+ size1 = ssize;
+ size2 = 0;
+ ptr2 = NULL;
+ }
+
+ if(ptr1) {
+ ZeroMemory(ptr1, size1 * sizeof(Sint16));
+ }
+ if(ptr2) {
+ ZeroMemory(ptr2, size2 * sizeof(Sint16));
+ }
+ nSndDataPos = (nSndDataPos + ssize) % uBufSize;
+ SDL_UnlockAudio();
+ SDL_SemPost(pSndApplySem);
}
- lpdsb->Unlock(ptr1, size1, ptr2, size2);
+ SDL_PauseAudio(0);
}
now_mute = true;
}
} else {
// update wave header
struct wavheader_t header;
- header.dwRIFF = 0x46464952;
- header.dwFileSize = rec_bytes + sizeof(wavheader_t) - 8;
- header.dwWAVE = 0x45564157;
- header.dwfmt_ = 0x20746d66;
- header.dwFormatSize = 16;
- header.wFormatTag = 1;
- header.wChannels = 2;
- header.wBitsPerSample = 16;
- header.dwSamplesPerSec = sound_rate;
- header.wBlockAlign = header.wChannels * header.wBitsPerSample / 8;
- header.dwAvgBytesPerSec = header.dwSamplesPerSec * header.wBlockAlign;
- header.dwdata = 0x61746164;
- header.dwDataLength = rec_bytes;
-
+
+ header.dwRIFF = EndianToLittle_DWORD(0x46464952);
+ header.dwFileSize = EndianToLittle_DWORD(rec_bytes + sizeof(wavheader_t) - 8);
+ header.dwWAVE = EndianToLittle_DWORD(0x45564157);
+ header.dwfmt_ = EndianToLittle_DWORD(0x20746d66);
+ header.dwFormatSize = EndianToLittle_DWORD(16);
+ header.wFormatTag = EndianToLittle_WORD(1);
+ header.wChannels = EndianToLittle_WORD(2);
+ header.wBitsPerSample = EndianToLittle_WORD(16);
+ header.dwSamplesPerSec = EndianToLittle_DWORD(sound_rate);
+ header.wBlockAlign = EndianToLittle_WORD(header.wChannels * header.wBitsPerSample / 8);
+ header.dwAvgBytesPerSec = EndianToLittle_DWORD(header.dwSamplesPerSec * header.wBlockAlign);
+ header.dwdata = EndianToLittle_DWORD(0x61746164);
+ header.dwDataLength = EndianToLittle_DWORD(rec_bytes);
rec->Fseek(0, FILEIO_SEEK_SET);
rec->Fwrite(&header, sizeof(wavheader_t), 1);
rec->Fclose();
--- /dev/null
+cmake_minimum_required (VERSION 2.6)
+
+message("* sdl")
+
+
+add_executable(xm7
+ api_draw.cpp
+ api_snd2.cpp api_wavwriter.cpp
+ snd_buffer.cpp
+ SndDrvTmpl.cpp SndDrvOpn.cpp SndDrvWav.cpp SndDrvBeep.cpp SndDrvCMT.cpp
+ api_kbd.cpp api_js.cpp api_mouse.cpp
+ SDLKbdInterface.cpp
+ SDLJoyInterface.cpp
+ KbdInterface.cpp
+ draw_thread.cpp
+ sdl_cpuid.c
+ sdl_inifile.c
+ sdl_file.c
+ windows_main.cpp
+ )
+
+target_link_libraries(xm7 ${LOCAL_LIBS}
+ ${AGAR_LIBS}
+ ${OPENGL_LIBRARY}
+ ${OPENCL_LIBRARY}
+ ${GETTEXT_LIBRARY}
+ ${OPENMP_LIBRARY}
+ ${SDL_LIBRARY}
+ ${THREADS_LIBRARY}
+ fontconfig
+ freetype
+ ${AGAR_DEPLIBS}
+)
+
+if(LIB_RT_HAS_NANOSLEEP)
+ add_target_library(xm7 rt)
+endif(LIB_RT_HAS_NANOSLEEP)
+
+install(TARGETS xm7 DESTINATION bin)
\ No newline at end of file
--- /dev/null
+message("* sdl/vram/generic")
+
+add_library(xm7_vram-generic api_vram256k.c
+ api_vram4096.c
+ api_vram8.c
+ api_vramvec.c
+)
\ No newline at end of file
--- /dev/null
+/*\r
+ * api_vram256k.cpp\r
+ * Convert VRAM -> VirtualVram\r
+ * (C) 2011 K.Ohta <whatisthis.sowhat@gmail.com>\r
+ */\r
+\r
+\r
+#include "api_draw.h"\r
+//#include "api_scaler.h"\r
+#include "api_vram.h"\r
+\r
+\r
+static void putword(Uint32 *disp, Uint32 *cx)\r
+{\r
+ disp[0] = cx[0];\r
+ disp[1] = cx[1];\r
+ disp[2] = cx[2];\r
+ disp[3] = cx[3];\r
+ disp[4] = cx[4];\r
+ disp[5] = cx[5];\r
+ disp[6] = cx[6];\r
+ disp[7] = cx[7];\r
+}\r
+\r
+\r
+\r
+static v8hi_t gpixel2cbuf(Uint32 addr, Uint32 mpage)\r
+{\r
+ Uint8 ret = 0;\r
+ v8hi_t v;\r
+ v8hi_t v1;\r
+ Uint8 *vram_p = vram_pb;\r
+ \r
+ v.i[0] = v.i[1] = v.i[2] = v.i[3] = 0;\r
+ if(!(mpage & 0x40)){\r
+ v.b[5] = vram_p[addr + 0x10000]; \r
+ v.b[4] = vram_p[addr + 0x12000]; \r
+ v.b[3] = vram_p[addr + 0x14000]; \r
+ v.b[2] = vram_p[addr + 0x16000]; \r
+ v.b[1] = vram_p[addr + 0x28000]; \r
+ v.b[0] = vram_p[addr + 0x2a000]; \r
+ v1 = lshift_6bit8v(&v);\r
+ return v1;\r
+ \r
+ } else {\r
+ v8hi_t r;\r
+ r.v = (v8si){0, 0, 0, 0, 0, 0, 0, 0};\r
+ return r;\r
+ }\r
+ \r
+\r
+}\r
+\r
+static v8hi_t rpixel2cbuf(Uint32 addr, Uint32 mpage)\r
+{\r
+ Uint8 ret = 0;\r
+ v8hi_t v;\r
+ v8hi_t v1;\r
+ Uint8 *vram_p = vram_pb;\r
+ \r
+ v.i[0] = v.i[1] = v.i[2] = v.i[3] = 0;\r
+ if(!(mpage & 0x20)){\r
+ v.b[5] = vram_p[addr + 0x08000]; \r
+ v.b[4] = vram_p[addr + 0x0a000]; \r
+ v.b[3] = vram_p[addr + 0x0c000]; \r
+ v.b[2] = vram_p[addr + 0x0e000]; \r
+ v.b[1] = vram_p[addr + 0x20000]; \r
+ v.b[0] = vram_p[addr + 0x22000]; \r
+ v1 = lshift_6bit8v(&v);\r
+ return v1;\r
+ } else {\r
+ v8hi_t r;\r
+ r.v = (v8si){0, 0, 0, 0, 0, 0, 0, 0};\r
+ return r;\r
+ }\r
+}\r
+\r
+static v8hi_t bpixel2cbuf(Uint32 addr, Uint32 mpage)\r
+{\r
+ Uint8 ret = 0;\r
+ v8hi_t v;\r
+ v8hi_t v1;\r
+ Uint8 *vram_p = vram_pb;\r
+ \r
+ v.i[0] = v.i[1] = v.i[2] = v.i[3] = 0;\r
+ if(!(mpage & 0x10)){\r
+ v.b[5] = vram_p[addr + 0x00000]; \r
+ v.b[4] = vram_p[addr + 0x02000]; \r
+ v.b[3] = vram_p[addr + 0x04000]; \r
+ v.b[2] = vram_p[addr + 0x06000]; \r
+ v.b[1] = vram_p[addr + 0x18000]; \r
+ v.b[0] = vram_p[addr + 0x1a000]; \r
+ \r
+ v1 = lshift_6bit8v(&v);\r
+// v1.v <<= 16;\r
+ return v1;\r
+ } else {\r
+ v8hi_t r;\r
+ r.v = (v8si){0, 0, 0, 0, 0, 0, 0, 0};\r
+ return r;\r
+ }\r
+}\r
+\r
+\r
+\r
+\r
+static void getvram_256k(Uint32 addr, Uint32 mpage, Uint32 *cbuf)\r
+{\r
+ v8hi_t r, g, b;\r
+ /*\r
+ * R,G,Bについて8bit単位で描画する。\r
+ * 高速化…キャッシュヒット率の向上を考慮して、\r
+ * インライン展開と細かいループの廃止を同時に行う\r
+ */\r
+ \r
+ b = bpixel2cbuf(addr, mpage);\r
+ r = rpixel2cbuf(addr, mpage);\r
+ g = gpixel2cbuf(addr, mpage);\r
+#ifdef AG_LITTLE_ENDIAN \r
+ cbuf[0] = (b.i[0] << 16) | (g.i[0] << 8) | r.i[0] | 0xff000000;\r
+ cbuf[1] = (b.i[1] << 16) | (g.i[1] << 8) | r.i[1] | 0xff000000;\r
+ cbuf[2] = (b.i[2] << 16) | (g.i[2] << 8) | r.i[2] | 0xff000000;\r
+ cbuf[3] = (b.i[3] << 16) | (g.i[3] << 8) | r.i[3] | 0xff000000;\r
+ cbuf[4] = (b.i[4] << 16) | (g.i[4] << 8) | r.i[4] | 0xff000000;\r
+ cbuf[5] = (b.i[5] << 16) | (g.i[5] << 8) | r.i[5] | 0xff000000;\r
+ cbuf[6] = (b.i[6] << 16) | (g.i[6] << 8) | r.i[6] | 0xff000000;\r
+ cbuf[7] = (b.i[7] << 16) | (g.i[7] << 8) | r.i[7] | 0xff000000;\r
+#else \r
+#endif\r
+ return ;\r
+}\r
+\r
+\r
+\r
+/*\r
+ * 8x8のピースをVRAMから作成する:VramLockしない事に注意\r
+ */\r
+void CreateVirtualVram256k_1Pcs(Uint32 *p, int x, int y, int pitch, int mpage)\r
+{\r
+ Uint32 c[8];\r
+ Uint32 *disp = p;\r
+ Uint32 addr;\r
+ \r
+ addr = y * 40 + x;\r
+ // Loop廃止(高速化)\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+\r
+}\r
+\r
+void CreateVirtualVram256k_WindowedLine(Uint32 *p, int ybegin, int yend, int xbegin, int xend, int mpage)\r
+{\r
+ Uint32 c[8];\r
+ Uint8 *disp;\r
+ Uint32 addr;\r
+ int pitch = sizeof(Uint32) * 8;\r
+ int xx;\r
+ int yy;\r
+ \r
+ for(yy = ybegin ; yy < yend; yy++) {\r
+ addr = yy * 40 + xbegin;\r
+ disp = (Uint8 *)p + (pitch * addr);\r
+ for(xx = xbegin; xx < xend; xx++) {\r
+\r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_256k(addr, mpage, (Uint32 *)&c);\r
+ putword((Uint32 *)disp, (Uint32 *)&c);\r
+ \r
+ }\r
+ }\r
+}\r
+\r
+void CreateVirtualVram256k_Line(Uint32 *p, int ybegin, int yend, int mpage)\r
+{\r
+ CreateVirtualVram256k_WindowedLine(p, ybegin, yend, 0, 40, mpage);\r
+}\r
+\r
+Api_Vram_FuncList api_vram256k_generic = {\r
+ CreateVirtualVram256k_1Pcs,\r
+ CreateVirtualVram256k_Line,\r
+ CreateVirtualVram256k_WindowedLine\r
+};\r
--- /dev/null
+/*\r
+ * api_vram4096.cpp\r
+ * Convert VRAM -> VirtualVram\r
+ * (C) 2011 K.Ohta <whatisthis.sowhat@gmail.com>\r
+ */\r
+\r
+\r
+#include "api_draw.h"\r
+#include "api_vram.h"\r
+#include "sdl_cpuid.h"\r
+#include "cache_wrapper.h"\r
+\r
+Uint8 *vram_pb;\r
+Uint8 *vram_pr;\r
+Uint8 *vram_pg;\r
+\r
+extern struct XM7_CPUID *pCpuID;\r
+\r
+void CalcPalette_4096Colors(Uint32 index, Uint8 r, Uint8 g, Uint8 b, Uint8 a)\r
+{\r
+ Uint32 ds;\r
+ Uint32 *pal = rgbAnalogGDI;\r
+ r = r & 0xf0;\r
+ g = g & 0xf0;\r
+ b = b & 0xf0;\r
+ \r
+// if((index > 4095) || (index < 0)) return;\r
+ index &= 0x0fff;\r
+#ifdef SDL_LIL_ENDIAN\r
+ ds =r | (g << 8) | (b << 16) | (a<<24);\r
+#else\r
+ ds = r<<24 + g<<16 + b<<8 + 255<<0;\r
+#endif\r
+ // Prefetch to cache when writing, not temporally.\r
+ _prefetch_data_write_permanent(&pal[index], sizeof(Uint32));\r
+ pal[index] = ds;\r
+}\r
+\r
+static inline void putword2_vec(Uint32 *disp, volatile v8hi_t cbuf)\r
+{\r
+ v8hi_t *dst = (v8hi_t *)disp;\r
+ v8hi_t r1;\r
+ \r
+ r1.i[0] = rgbAnalogGDI[cbuf.i[0]];\r
+ r1.i[1] = rgbAnalogGDI[cbuf.i[1]];\r
+ r1.i[2] = rgbAnalogGDI[cbuf.i[2]];\r
+ r1.i[3] = rgbAnalogGDI[cbuf.i[3]];\r
+ r1.i[4] = rgbAnalogGDI[cbuf.i[4]];\r
+ r1.i[5] = rgbAnalogGDI[cbuf.i[5]];\r
+ r1.i[6] = rgbAnalogGDI[cbuf.i[6]];\r
+ r1.i[7] = rgbAnalogGDI[cbuf.i[7]];\r
+ dst->v = r1.v;\r
+}\r
+\r
+static inline void getvram_4096_vec(Uint32 addr, v8hi_t *cbuf)\r
+{\r
+\r
+ uint8_t r0, r1, r2, r3;\r
+ uint8_t g0, g1, g2, g3;\r
+ uint8_t b0, b1, b2, b3;\r
+ /*\r
+ * R,G,Bについて8bit単位で描画する。\r
+ * 高速化…キャッシュヒット率の向上とVector演算(MMXetc)の速度効果を考慮して、\r
+ * ループの廃止を同時に行う\r
+ */\r
+ g3 = vram_pg[addr + 0x00000];\r
+ g2 = vram_pg[addr + 0x02000];\r
+ g1 = vram_pg[addr + 0x04000];\r
+ g0 = vram_pg[addr + 0x06000];\r
+ cbuf->v = \r
+ aPlanes[G0 + g0] |\r
+ aPlanes[G1 + g1] |\r
+ aPlanes[G2 + g2] |\r
+ aPlanes[G3 + g3] ;\r
+\r
+ \r
+ r3 = vram_pr[addr + 0x00000];\r
+ r2 = vram_pr[addr + 0x02000];\r
+ r1 = vram_pr[addr + 0x04000];\r
+ r0 = vram_pr[addr + 0x06000];\r
+ cbuf->v = cbuf->v |\r
+ aPlanes[R0 + r0] |\r
+ aPlanes[R1 + r1] |\r
+ aPlanes[R2 + r2] |\r
+ aPlanes[R3 + r3] ;\r
+\r
+ b3 = vram_pb[addr + 0x00000];\r
+ b2 = vram_pb[addr + 0x02000];\r
+ b1 = vram_pb[addr + 0x04000];\r
+ b0 = vram_pb[addr + 0x06000];\r
+ cbuf->v = cbuf->v |\r
+ aPlanes[B0 + b0] |\r
+ aPlanes[B1 + b1] |\r
+ aPlanes[B2 + b2] |\r
+ aPlanes[B3 + b3] ;\r
+ return;\r
+}\r
+\r
+/*\r
+ * 8x8のピースをVRAMから作成する:VramLockしない事に注意\r
+ */\r
+void CreateVirtualVram4096_1Pcs(Uint32 *p, int x, int y, int pitch, int mode)\r
+{\r
+// Uint32 c[8];\r
+ v8hi_t c;\r
+ Uint32 *disp = p;\r
+ Uint32 addr;\r
+\r
+ addr = y * 40 + x;\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+// disp += pitch;\r
+ } else {\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+ \r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+ \r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+ \r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+ \r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ }\r
+ \r
+}\r
+\r
+/*\r
+ * 1LineのピースをVRAMから作成する:VramLockしない事に注意\r
+ */\r
+void CreateVirtualVram4096_Line(Uint32 *p, int ybegin, int yend, int mode)\r
+{\r
+// Uint32 c[8];\r
+ v8hi_t c;\r
+ Uint8 *disp;\r
+ Uint32 addr;\r
+ int yy;\r
+ int xx;\r
+ int pitch;\r
+\r
+ pitch = sizeof(Uint32) * 8;\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ for(yy = ybegin; yy < yend; yy++) {\r
+ addr = yy * 40;\r
+ disp = (Uint8 *)p + (pitch * addr);\r
+ for(xx = 0; xx < 5; xx++) {\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ }\r
+ }\r
+ } else {\r
+ for(yy = ybegin; yy < yend; yy++) {\r
+ addr = yy * 40;\r
+ disp = (Uint8 *)p + (pitch * addr);\r
+ for(xx = 0; xx < 5; xx++) {\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ }\r
+ }\r
+ } \r
+}\r
+\r
+/*\r
+ * ybegin - yendの行を変換する\r
+ */\r
+void CreateVirtualVram4096_WindowedLine(Uint32 *p, int ybegin, int yend, int xbegin, int xend, int mode)\r
+{\r
+#if (__GNUC__ >= 4) \r
+ v8hi_t c;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ Uint32 addr;\r
+ int pitch;\r
+ int xx;\r
+ int yy;\r
+ \r
+ if(p == NULL) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 40 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 320 + xbegin]);\r
+ for(xx = xbegin; xx < xend; xx ++) { \r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ }\r
+ }\r
+ return;\r
+ } else {\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 40 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 320 + xbegin]);\r
+ for(xx = xbegin; xx < xend; xx++) { \r
+ getvram_4096_vec(addr, &c);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ }\r
+ }\r
+ return;\r
+ }\r
+ #else \r
+ Uint32 c[8];\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ int xx;\r
+ int yy;\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = y * 40 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 320 + xbegin]);\r
+ for(xx = xbegin; xx < xend; xx++) {\r
+ getvram_4096(addr, c);\r
+ putword2((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ }\r
+ }\r
+#endif \r
+}\r
+\r
+Api_Vram_FuncList api_vram4096_generic = {\r
+ CreateVirtualVram4096_1Pcs,\r
+ CreateVirtualVram4096_Line,\r
+ CreateVirtualVram4096_WindowedLine\r
+};\r
--- /dev/null
+/*\r
+ * api_vram8.cpp\r
+ * Convert VRAM -> VirtualVram\r
+ * (C) 2011 K.Ohta <whatisthis.sowhat@gmail.com>\r
+ */\r
+\r
+\r
+#include "api_draw.h"\r
+#include "api_vram.h"\r
+#include "sdl_cpuid.h"\r
+#include "cache_wrapper.h"\r
+\r
+extern struct XM7_CPUID *pCpuID;\r
+\r
+\r
+void SetVram_200l(Uint8 *p)\r
+{\r
+ vram_pb = p + 0;\r
+ vram_pg = p + 0x10000;\r
+ vram_pr = p + 0x8000;\r
+}\r
+\r
+void SetVram_400l(Uint8 *p)\r
+{\r
+ vram_pb = p + 0;\r
+ vram_pg = p + 0x10000;\r
+ vram_pr = p + 0x8000;\r
+}\r
+\r
+\r
+void CalcPalette_8colors(Uint32 index, Uint8 r, Uint8 g, Uint8 b, Uint8 a)\r
+{\r
+ Uint32 ds;\r
+\r
+#ifdef AG_LITTLE_ENDIAN\r
+ ds = r | (g << 8) | (b << 16) | 0xff000000;\r
+#else\r
+ ds = r<<24 + g<<16 + b<<8 + 255<<0;\r
+#endif\r
+ _prefetch_data_write_permanent(rgbTTLGDI, sizeof(Uint32) * 8);\r
+ rgbTTLGDI[index] = ds;\r
+}\r
+\r
+#if (__GNUC__ >= 4)\r
+static void getvram_8_vec(Uint32 addr, v8hi_t *cbuf)\r
+{\r
+ uint8_t r, g, b;\r
+// volatile v4hi cbuf __attribute__((aligned(32)));\r
+ /*\r
+ * R,G,Bについて8bit単位で描画する。\r
+ * 高速化…キャッシュヒット率の向上とVector演算(MMXetc)の速度効果を考慮して、\r
+ * ループの廃止を同時に行う\r
+ */\r
+\r
+ g = vram_pg[addr];\r
+ r = vram_pr[addr];\r
+ b = vram_pb[addr];\r
+\r
+ cbuf->v = aPlanes[B0 + b] |\r
+ aPlanes[B1 + r] |\r
+ aPlanes[B2 + g];\r
+ return;\r
+}\r
+\r
+static inline void putword8_vec(Uint32 *disp, volatile v8hi_t c, Uint32 *pal)\r
+{\r
+\r
+ v8hi_t *dst = (v8hi_t *)disp;\r
+ v8hi_t r1;\r
+ \r
+// if(disp == NULL) return;\r
+ //c.v = c.v & (v8si){7, 7, 7, 7, 7, 7, 7, 7};\r
+ r1.i[0] = pal[c.i[0] & 7]; // ?!\r
+ r1.i[1] = pal[c.i[1] & 7];\r
+ r1.i[2] = pal[c.i[2] & 7];\r
+ r1.i[3] = pal[c.i[3] & 7];\r
+ r1.i[4] = pal[c.i[4] & 7];\r
+ r1.i[5] = pal[c.i[5] & 7];\r
+ r1.i[6] = pal[c.i[6] & 7];\r
+ r1.i[7] = pal[c.i[7] & 7];\r
+ dst->v = r1.v;\r
+}\r
+\r
+#else\r
+static inline void planeto8(Uint32 *c, uint8_t r, unit8_t g, uint8_t b)\r
+{\r
+ Uint8 mask;\r
+ \r
+ mask = 0x80;\r
+ c[0] = ((r & mask) >> 6) | ((g & mask) >> 5) || ((b & mask) >> 7);\r
+ mask >>= 1;\r
+ c[1] = ((r & mask) >> 5) | ((g & mask) >> 4) || ((b & mask) >> 6);\r
+ mask >>= 1;\r
+ c[2] = ((r & mask) >> 4) | ((g & mask) >> 3) || ((b & mask) >> 5);\r
+ mask >>= 1;\r
+ c[3] = ((r & mask) >> 3) | ((g & mask) >> 2) || ((b & mask) >> 4);\r
+ mask >>= 1;\r
+ c[4] = ((r & mask) >> 2) | ((g & mask) >> 1) || ((b & mask) >> 3);\r
+ mask >>= 1;\r
+ c[5] = ((r & mask) >> 1) | (g & mask) || ((b & mask) >> 2);\r
+ mask >>= 1;\r
+ c[6] = (r & mask) | ((g & mask) << 1) || ((b & mask) >> 1);\r
+ mask >>= 1;\r
+ c[7] = ((r & mask) << 1) | ((g & mask) << 2) || (b & mask);\r
+ mask >>= 1;\r
+}\r
+\r
+static void getvram_8(Uint32 addr, Uint32 *cbuf)\r
+{\r
+ uint8_t r, g, b;\r
+ /*\r
+ * R,G,Bについて8bit単位で描画する。\r
+ * 高速化…キャッシュヒット率の向上とVector演算(MMXetc)の速度効果を考慮して、\r
+ * ループの廃止を同時に行う\r
+ */\r
+ \r
+ g = vram_pg[addr];\r
+ r = vram_pr[addr];\r
+ b = vram_pb[addr];\r
+ planeto8(cbuf, r, g, b);\r
+ \r
+ return;\r
+}\r
+\r
+static inline void putword8(Uint32 *disp, Uint32 *c, Uint32 *pal)\r
+{\r
+\r
+ Uint32 *r1 = disp;\r
+\r
+ r1[0] = pal[c[0] & 7]; // ?!\r
+ r1[1] = pal[c[1] & 7];\r
+ r1[2] = pal[c[2] & 7];\r
+ r1[3] = pal[c[3] & 7];\r
+ r1[4] = pal[c[4] & 7];\r
+ r1[5] = pal[c[5] & 7];\r
+ r1[6] = pal[c[6] & 7];\r
+ r1[7] = pal[c[7] & 7];\r
+}\r
+\r
+#endif // __GNUC__ >= 4\r
+\r
+\r
+\r
+/*\r
+ * 8x8のピースをVRAMから作成する:VramLockしない事に注意\r
+ */\r
+void CreateVirtualVram8_1Pcs(Uint32 *p, int x, int y, int pitch, int mode)\r
+{\r
+#if (__GNUC__ >= 4) \r
+ v8hi_t c;\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint32 *disp = p;\r
+ Uint32 addr;\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+ addr = y * 80 + x;\r
+\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+// disp += pitch;\r
+ return;\r
+ } else {\r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ getvram_8_vec(addr , &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ getvram_8_vec(addr , &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+// addr += 80;\r
+// disp += pitch;\r
+ }\r
+#else \r
+ Uint32 c[8];\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+ addr = y * 80 + x;\r
+\r
+ // Loop廃止(高速化)\r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr , c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr , c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ // addr += 80;\r
+ // disp += pitch;\r
+ \r
+#endif \r
+}\r
+\r
+/*\r
+ * ybegin - yendの行を変換する\r
+ */\r
+void CreateVirtualVram8_Line(Uint32 *p, int ybegin, int yend, int mode)\r
+{\r
+ v8hi_t c;\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ Uint32 addr;\r
+ int pitch;\r
+ int xx;\r
+ int yy = ybegin;\r
+ \r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+// for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 80;\r
+// disp = (Uint8 *)(&p[yy * 640]);\r
+ for(xx = 0; xx < (80 / 8); xx ++) { \r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ }\r
+// }\r
+ return;\r
+ } else {\r
+// for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 80;\r
+// disp = (Uint8 *)(&p[yy * 640]);\r
+ for(xx = 0; xx < (80 / 8); xx++) { \r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_8_vec(addr , &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+\r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_8_vec(addr , &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ }\r
+ \r
+// }\r
+ return;\r
+ }\r
+}\r
+\r
+/*\r
+ * ybegin - yendの行を変換する\r
+ */\r
+void CreateVirtualVram8_WindowedLine(Uint32 *p, int ybegin, int yend, int xbegin, int xend, int mode)\r
+{\r
+#if (__GNUC__ >= 4) \r
+ v8hi_t c;\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ Uint32 addr;\r
+ int pitch;\r
+ int xx;\r
+ int yy = ybegin;\r
+ \r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+ xbegin = xbegin % 80;\r
+ xend = xend % 80;\r
+ ybegin = ybegin % 400;\r
+ \r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ addr = yy * 80 + xbegin;\r
+ disp = (Uint8 *)(&p[xbegin * 8]);\r
+ for(xx = xbegin; xx < xend; xx ++) { \r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ }\r
+ return;\r
+ } else {\r
+ addr = yy * 80 + xbegin;\r
+ disp = (Uint8 *)(&p[xbegin * 8]);\r
+ for(xx = xbegin; xx < xend; xx++) { \r
+ getvram_8_vec(addr, &c);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ }\r
+ return;\r
+ }\r
+ #else \r
+ Uint32 c[8];\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ int xx;\r
+ int yy;\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = y * 80 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 640 + xbegin]);\r
+ for(xx = xbegin; xx < xend; xx++) {\r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ }\r
+ }\r
+#endif \r
+}\r
+\r
+Api_Vram_FuncList api_vram8_generic = {\r
+ CreateVirtualVram8_1Pcs,\r
+ CreateVirtualVram8_Line,\r
+ CreateVirtualVram8_WindowedLine\r
+};\r
--- /dev/null
+/*\r
+ * api_vramvec.cpp\r
+ * Convert VRAM -> VirtualVram(Vector Version)\r
+ * (C) 2012 K.Ohta <whatisthis.sowhat@gmail.com>\r
+ */\r
+\r
+\r
+#include "xm7_types.h"\r
+#include "api_draw.h"\r
+#include "api_vram.h"\r
+#include "agar_logger.h"\r
+#include "cache_wrapper.h"\r
+\r
+/*\r
+* Definition of Convertsion Tables.\r
+*/\r
+// Reduce Tables 20120131\r
+\r
+v8si *aPlanes;\r
+static void initvramtblsub_vec(volatile unsigned char x, volatile v8hi_t *p)\r
+{\r
+// p->v = (v8si){x & 0x80, x & 0x40, x & 0x20, x & 0x10, x & 0x08, x & 0x04, x & 0x02, x & 0x01};\r
+ \r
+ p->i[0] = (x & 0x80) >> 7;\r
+ p->i[1] = (x & 0x40) >> 6;\r
+ p->i[2] = (x & 0x20) >> 5;\r
+ p->i[3] = (x & 0x10) >> 4;\r
+ p->i[4] = (x & 0x08) >> 3;\r
+ p->i[5] = (x & 0x04) >> 2;\r
+ p->i[6] = (x & 0x02) >> 1;\r
+ p->i[7] = x & 0x01;\r
+ // 8 Colors\r
+}\r
+\r
+void initvramtbl_8_vec(void)\r
+{\r
+}\r
+\r
+static v8si *initvramtblsub(int size)\r
+{\r
+ v8si *p;\r
+#ifndef _WINDOWS\r
+ if(posix_memalign((void **)&p, 16 * sizeof(Uint32), sizeof(v8si) * size) != 0) return NULL;\r
+#else\r
+ p = (v8si *)__mingw_aligned_malloc(sizeof(v8si) * size, 16 * sizeof(Uint32));\r
+ if(p == NULL) return NULL;\r
+#endif\r
+ return p;\r
+}\r
+\r
+\r
+void initvramtbl_4096_vec(void)\r
+{\r
+ int i;\r
+ volatile v8hi_t r;\r
+ aPlanes = initvramtblsub(12 * 256);\r
+ if(aPlanes == NULL) return;\r
+ XM7_DebugLog(XM7_LOG_DEBUG, "Vram Table OK");\r
+ // Init Mask Table\r
+ for(i = 0; i <= 255; i++){\r
+ initvramtblsub_vec(i & 255, &r);\r
+\r
+ aPlanes[B0 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[B1 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[B2 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[B3 + i] = r.v;\r
+ r.v <<= 1;\r
+\r
+\r
+ aPlanes[R0 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[R1 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[R2 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[R3 + i] = r.v;\r
+ r.v <<= 1;\r
+ \r
+ aPlanes[G0 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[G1 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[G2 + i] = r.v;\r
+ r.v <<= 1;\r
+ aPlanes[G3 + i] = r.v;\r
+// r.v <<= 1;\r
+ }\r
+ _prefetch_data_read_permanent(aPlanes, sizeof(Uint32) * 256 * 8 * 12); // 98KB (!), priority = 1.\r
+}\r
+\r
+void detachvramtbl_8_vec(void)\r
+{\r
+ \r
+}\r
+\r
+void detachvramtbl_4096_vec(void)\r
+{\r
+ if(aPlanes != NULL) {\r
+#ifndef _WINDOWS\r
+ free(aPlanes);\r
+#else\r
+ __mingw_aligned_free(aPlanes);\r
+#endif\r
+ aPlanes = NULL;\r
+ }\r
+}\r
+\r
+\r
+\r
+v8hi_t lshift_6bit8v(v8hi_t *v)\r
+{\r
+ v8hi_t r;\r
+ v8hi_t cbuf;\r
+ v8hi_t mask;\r
+ mask.v = (v8si){0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8};\r
+ cbuf.v =\r
+ aPlanes[B2 + v->b[0]] |\r
+ aPlanes[B3 + v->b[1]] |\r
+ aPlanes[R0 + v->b[2]] |\r
+ aPlanes[R1 + v->b[3]] |\r
+ aPlanes[R2 + v->b[4]] |\r
+ aPlanes[R3 + v->b[5]];\r
+ \r
+ mask.v = mask.v & cbuf.v;\r
+#if ((__GNUC__ == 4) && (__GCC_MINOR__ >= 7)) || (__GNUC__ > 4) //GCC 4.7 or later.\r
+ r.v = mask.v != (v8si){0, 0, 0, 0, 0, 0, 0, 0};\r
+ r.v = r.v & (v8si) {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};\r
+ cbuf.v = cbuf.v | r.v;\r
+#else\r
+ if(mask.i[0] != 0) cbuf.s[0] |= 0x03;\r
+ if(mask.i[1] != 0) cbuf.s[1] |= 0x03;\r
+ if(mask.i[2] != 0) cbuf.s[2] |= 0x03;\r
+ if(mask.i[3] != 0) cbuf.s[3] |= 0x03;\r
+ if(mask.i[4] != 0) cbuf.s[4] |= 0x03;\r
+ if(mask.i[5] != 0) cbuf.s[5] |= 0x03;\r
+ if(mask.i[6] != 0) cbuf.s[6] |= 0x03;\r
+ if(mask.i[7] != 0) cbuf.s[7] |= 0x03;\r
+#endif \r
+ return cbuf;\r
+}\r
+\r
+\r
+\r
+\r
--- /dev/null
+message("* sdl/vram/sse2")
+
+#set(CMAKE_BUILD_SETTING_C_FLAGS "${CMAKE_C_FLAGS} -msse2 -msse -mmmx")
+add_compile_options(-msse2 -msse -mmmx)
+add_library(xm7_vram-sse2 api_vram256k.c
+ api_vram4096.c
+ api_vram8.c
+ api_vramvec.c
+)
--- /dev/null
+/*\r
+ * api_vram256k.cpp\r
+ * Convert VRAM -> VirtualVram\r
+ * (C) 2011 K.Ohta <whatisthis.sowhat@gmail.com>\r
+ */\r
+\r
+#include "api_draw.h"\r
+//#include "api_scaler.h"\r
+#include "api_vram.h"\r
+#include "cache_wrapper.h"\r
+\r
+extern v8hi_t lshift_6bit8v_SSE2(v8hi_t v);\r
+\r
+static inline void putword(Uint32 *disp, v8hi_t cx)\r
+{\r
+ v8hi_t *dst = (v8hi_t *)disp;\r
+ _prefetch_data_write_l1(disp, sizeof(Uint32) * 8);\r
+ *dst = cx;\r
+}\r
+\r
+\r
+\r
+static v8hi_t gpixel2cbuf(Uint32 addr, Uint32 mpage)\r
+{\r
+ Uint8 ret = 0;\r
+ register v8hi_t v;\r
+ register v8hi_t v1;\r
+ Uint8 *vram_p = vram_pb;\r
+ \r
+ v.i[0] = v.i[1] = v.i[2] = v.i[3] = 0;\r
+ if(!(mpage & 0x40)){\r
+ v.b[5] = vram_p[addr + 0x10000]; \r
+ v.b[4] = vram_p[addr + 0x12000]; \r
+ v.b[3] = vram_p[addr + 0x14000]; \r
+ v.b[2] = vram_p[addr + 0x16000]; \r
+ v.b[1] = vram_p[addr + 0x28000]; \r
+ v.b[0] = vram_p[addr + 0x2a000]; \r
+ v1 = lshift_6bit8v_SSE2(v);\r
+ return v1;\r
+ \r
+ } else {\r
+ register v8hi_t r;\r
+ r.v = (v8si){0, 0, 0, 0, 0, 0, 0, 0};\r
+ return r;\r
+ }\r
+}\r
+\r
+static v8hi_t rpixel2cbuf(Uint32 addr, Uint32 mpage)\r
+{\r
+ Uint8 ret = 0;\r
+ register v8hi_t v;\r
+ register v8hi_t v1;\r
+ Uint8 *vram_p = vram_pb;\r
+ \r
+ v.i[0] = v.i[1] = v.i[2] = v.i[3] = 0;\r
+ if(!(mpage & 0x20)){\r
+ v.b[5] = vram_p[addr + 0x08000]; \r
+ v.b[4] = vram_p[addr + 0x0a000]; \r
+ v.b[3] = vram_p[addr + 0x0c000]; \r
+ v.b[2] = vram_p[addr + 0x0e000]; \r
+ v.b[1] = vram_p[addr + 0x20000]; \r
+ v.b[0] = vram_p[addr + 0x22000]; \r
+ v1 = lshift_6bit8v_SSE2(v);\r
+ return v1;\r
+ } else {\r
+ register v8hi_t r;\r
+ r.v = (v8si){0, 0, 0, 0, 0, 0, 0, 0};\r
+ return r;\r
+ }\r
+}\r
+\r
+static v8hi_t bpixel2cbuf(Uint32 addr, Uint32 mpage)\r
+{\r
+ Uint8 ret = 0;\r
+ register v8hi_t v;\r
+ register v8hi_t v1;\r
+ Uint8 *vram_p = vram_pb;\r
+ \r
+ v.i[0] = v.i[1] = v.i[2] = v.i[3] = 0;\r
+ if(!(mpage & 0x10)){\r
+ v.b[5] = vram_p[addr + 0x00000]; \r
+ v.b[4] = vram_p[addr + 0x02000]; \r
+ v.b[3] = vram_p[addr + 0x04000]; \r
+ v.b[2] = vram_p[addr + 0x06000]; \r
+ v.b[1] = vram_p[addr + 0x18000]; \r
+ v.b[0] = vram_p[addr + 0x1a000]; \r
+\r
+ v1 = lshift_6bit8v_SSE2(v);\r
+// v1.v <<= 16;\r
+ return v1;\r
+ } else {\r
+ register v8hi_t r;\r
+ r.vv = (v8ii){0, 0, 0, 0, 0, 0, 0, 0};\r
+ return r;\r
+ }\r
+}\r
+\r
+\r
+\r
+\r
+static v8hi_t getvram_256k(Uint32 addr, Uint32 mpage)\r
+{\r
+ register v8hi_t r, g, b;\r
+ v8hi_t a;\r
+ register v8hi_t dst;\r
+ /*\r
+ * R,G,Bについて8bit単位で描画する。\r
+ * 高速化…キャッシュヒット率の向上を考慮して、\r
+ * インライン展開と細かいループの廃止を同時に行う\r
+ */\r
+ \r
+ b = bpixel2cbuf(addr, mpage);\r
+ r = rpixel2cbuf(addr, mpage);\r
+ g = gpixel2cbuf(addr, mpage);\r
+#ifdef AG_LITTLE_ENDIAN\r
+ a.vv = (v8ii){0xff000000, 0xff000000, 0xff000000, 0xff000000, 0xff000000, 0xff000000, 0xff000000, 0xff000000};\r
+ dst.vv = (b.vv << 16 ) | (g.vv << 8) | r.vv | a.vv;\r
+#else \r
+#endif\r
+ return dst;\r
+}\r
+\r
+\r
+\r
+/*\r
+ * 8x8のピースをVRAMから作成する:VramLockしない事に注意\r
+ */\r
+void CreateVirtualVram256k_1Pcs_SSE2(Uint32 *p, int x, int y, int pitch, int mpage)\r
+{\r
+ register v8hi_t c;\r
+ register Uint32 *disp = p;\r
+ register Uint32 addr;\r
+ \r
+ addr = y * 40 + x;\r
+ // Loop廃止(高速化)\r
+ \r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+\r
+}\r
+\r
+void CreateVirtualVram256k_Line_SSE2(Uint32 *p, int ybegin, int yend, int mpage)\r
+{\r
+ register v8hi_t c;\r
+ register v8hi_t *disp;\r
+ register Uint32 addr;\r
+ int yy;\r
+ int xx;\r
+ const int pitch = sizeof(Uint32) * 8;\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ for(yy = ybegin; yy < yend; yy++) {\r
+ addr = yy * 40;\r
+ disp = (v8hi_t *)((Uint8 *)p + (pitch * addr));\r
+ for(xx = 0; xx < (40 / 8); xx++) {\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ }\r
+ }\r
+ } else {\r
+ for(yy = ybegin; yy < yend; yy++) {\r
+ addr = yy * 40;\r
+ disp = (v8hi_t *)((Uint8 *)p + (pitch * addr));\r
+ for(xx = 0; xx < (40 / 8); xx++) {\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ addr++;\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ addr++;\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ addr++;\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ addr++;\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ addr++;\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ addr++;\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ addr++;\r
+ c = getvram_256k(addr, mpage);\r
+ putword((Uint32 *)disp, c);\r
+ disp++;\r
+ addr++;\r
+ }\r
+ }\r
+ } \r
+}\r
+\r
+void CreateVirtualVram256k_WindowedLine_SSE2(Uint32 *p, int ybegin, int yend, int xbegin, int xend, int mpage)\r
+{\r
+ CreateVirtualVram256k_Line_SSE2(p, ybegin, yend, mpage);\r
+}\r
+\r
+Api_Vram_FuncList api_vram256k_sse2 = {\r
+ CreateVirtualVram256k_1Pcs_SSE2,\r
+ CreateVirtualVram256k_Line_SSE2,\r
+ CreateVirtualVram256k_WindowedLine_SSE2\r
+};\r
--- /dev/null
+/*\r
+ * api_vram4096.cpp\r
+ * Convert VRAM -> VirtualVram\r
+ * (C) 2011 K.Ohta <whatisthis.sowhat@gmail.com>\r
+ */\r
+\r
+\r
+#include "api_draw.h"\r
+#include "api_vram.h"\r
+#include "cache_wrapper.h"\r
+\r
+//Uint8 *vram_pb;\r
+//Uint8 *vram_pr;\r
+//Uint8 *vram_pg;\r
+\r
+\r
+\r
+static inline void putword2_vec(Uint32 *disp, v8hi_t cbuf)\r
+{\r
+ v8hi_t *dst = (v8hi_t *)disp;\r
+ v8hi_t r1;\r
+ register int j;\r
+ _prefetch_data_write_l1(disp, sizeof(Uint32) * 8); // 4 * 8 = 32bytes.\r
+ for(j = 0; j < 8; j++) dst->i[j] = rgbAnalogGDI[cbuf.i[j]];\r
+}\r
+\r
+static inline v8hi_t getvram_4096_vec(Uint32 addr)\r
+{\r
+ v8hi_t cbuf;\r
+ uint8_t r0, r1, r2, r3;\r
+ uint8_t g0, g1, g2, g3;\r
+ uint8_t b0, b1, b2, b3;\r
+ /*\r
+ * R,G,Bについて8bit単位で描画する。\r
+ * 高速化…キャッシュヒット率の向上とVector演算(MMXetc)の速度効果を考慮して、\r
+ * ループの廃止を同時に行う\r
+ */\r
+ g3 = vram_pg[addr + 0x00000];\r
+ g2 = vram_pg[addr + 0x02000];\r
+ g1 = vram_pg[addr + 0x04000];\r
+ g0 = vram_pg[addr + 0x06000];\r
+ cbuf.v = \r
+ aPlanes[G0 + g0] |\r
+ aPlanes[G1 + g1] |\r
+ aPlanes[G2 + g2] |\r
+ aPlanes[G3 + g3] ;\r
+\r
+ \r
+ r3 = vram_pr[addr + 0x00000];\r
+ r2 = vram_pr[addr + 0x02000];\r
+ r1 = vram_pr[addr + 0x04000];\r
+ r0 = vram_pr[addr + 0x06000];\r
+ cbuf.v = cbuf.v |\r
+ aPlanes[R0 + r0] |\r
+ aPlanes[R1 + r1] |\r
+ aPlanes[R2 + r2] |\r
+ aPlanes[R3 + r3] ;\r
+\r
+ b3 = vram_pb[addr + 0x00000];\r
+ b2 = vram_pb[addr + 0x02000];\r
+ b1 = vram_pb[addr + 0x04000];\r
+ b0 = vram_pb[addr + 0x06000];\r
+ cbuf.v = cbuf.v |\r
+ aPlanes[B0 + b0] |\r
+ aPlanes[B1 + b1] |\r
+ aPlanes[B2 + b2] |\r
+ aPlanes[B3 + b3] ;\r
+ return cbuf;\r
+}\r
+\r
+/*\r
+ * 8x8のピースをVRAMから作成する:VramLockしない事に注意\r
+ */\r
+void CreateVirtualVram4096_1Pcs_SSE2(Uint32 *p, int x, int y, int pitch, int mode)\r
+{\r
+// Uint32 c[8];\r
+ register v8hi_t c;\r
+ Uint32 *disp = p;\r
+ Uint32 addr;\r
+ register int i;\r
+\r
+// for(i = 0; i < 4096; i++) __builtin_prefetch(&rgbAnalogGDI[i], 0, 0);\r
+ addr = y * 40 + x;\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+// disp += pitch;\r
+ } else {\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+ \r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+ \r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+ \r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr += 40;\r
+ disp += pitch;\r
+ \r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ }\r
+ \r
+}\r
+ \r
+\r
+/*\r
+ * 1LineのピースをVRAMから作成する:VramLockしない事に注意\r
+ */\r
+void CreateVirtualVram4096_Line_SSE2(Uint32 *p, int ybegin, int yend, int mode)\r
+{\r
+// Uint32 c[8];\r
+ register v8hi_t c;\r
+ Uint8 *disp;\r
+ Uint32 addr;\r
+ int yy;\r
+ int xx;\r
+ const int pitch = sizeof(Uint32) * 8;\r
+ int i;\r
+\r
+// for(i = 0; i < 4096; i++) __builtin_prefetch(&rgbAnalogGDI[i], 0, 0);\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ for(yy = ybegin; yy < yend; yy++) {\r
+ addr = yy * 40;\r
+ disp = (Uint8 *)p + (pitch * addr);\r
+ for(xx = 0; xx < (40 / 8); xx++) {\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ }\r
+ }\r
+ } else {\r
+ for(yy = ybegin; yy < yend; yy++) {\r
+ addr = yy * 40;\r
+ disp = (Uint8 *)p + (pitch * addr);\r
+ for(xx = 0; xx < (40 / 8); xx++) {\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ addr++;\r
+ }\r
+ }\r
+ } \r
+}\r
+\r
+/*\r
+ * ybegin - yendの行を変換する\r
+ */\r
+void CreateVirtualVram4096_WindowedLine_SSE2(Uint32 *p, int ybegin, int yend, int xbegin, int xend, int mode)\r
+{\r
+#if (__GNUC__ >= 4) \r
+ register v8hi_t c;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ Uint32 addr;\r
+ int pitch;\r
+ int xx;\r
+ int yy;\r
+ \r
+ if(p == NULL) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 40 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 320 + xbegin]);\r
+ for(xx = xbegin; xx < xend; xx ++) { \r
+ putword2_vec((Uint32 *)disp, c);\r
+ disp += pitch;\r
+ }\r
+ }\r
+ return;\r
+ } else {\r
+ int xs = (xend - xbegin) / 8;\r
+ int xs2 = (xend - xbegin) % 8;\r
+ int xx2;\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 40 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 320 + xbegin]);\r
+ xx = xbegin;\r
+ for(xx2 = 0; xx2 < xs; xx2++) {\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ \r
+ xx += 8;\r
+ }\r
+ if(xs2 <= 0) continue;\r
+ \r
+ for(;xx < xend; xx++) { \r
+ c = getvram_4096_vec(addr);\r
+ putword2_vec((Uint32 *)disp, c);\r
+ addr++;\r
+ disp += pitch;\r
+ }\r
+ }\r
+ return;\r
+ }\r
+ #else \r
+ Uint32 c[8];\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ int xx;\r
+ int yy;\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = y * 40 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 320 + xbegin]);\r
+ for(xx = xbegin; xx < xend; xx++) {\r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp += pitch;\r
+ }\r
+ }\r
+#endif \r
+}\r
+\r
+Api_Vram_FuncList api_vram4096_sse2 = {\r
+ CreateVirtualVram4096_1Pcs_SSE2,\r
+ CreateVirtualVram4096_Line_SSE2,\r
+ CreateVirtualVram4096_WindowedLine_SSE2\r
+};\r
--- /dev/null
+/*\r
+ * api_vram8.cpp\r
+ * Convert VRAM -> VirtualVram\r
+ * (C) 2011 K.Ohta <whatisthis.sowhat@gmail.com>\r
+ */\r
+\r
+\r
+#include "api_draw.h"\r
+#include "api_vram.h"\r
+#include "sdl_cpuid.h"\r
+#include "cache_wrapper.h"\r
+\r
+extern void CreateVirtualVram8_WindowedLine(Uint32 *p, int ybegin, int yend, int xbegin, int xend, int mode);\r
+\r
+\r
+#if (__GNUC__ >= 4)\r
+\r
+static inline v8hi_t getvram_8_vec(Uint32 addr)\r
+{\r
+ register uint8_t r, g, b;\r
+ v8hi_t ret;\r
+// volatile v4hi cbuf __attribute__((aligned(32)));\r
+ /*\r
+ * R,G,Bについて8bit単位で描画する。\r
+ * 高速化…キャッシュヒット率の向上とVector演算(MMXetc)の速度効果を考慮して、\r
+ * ループの廃止を同時に行う\r
+ */\r
+ g = vram_pg[addr];\r
+ r = vram_pr[addr];\r
+ b = vram_pb[addr];\r
+\r
+ ret.v = aPlanes[B0 + b] |\r
+ aPlanes[B1 + r] |\r
+ aPlanes[B2 + g];\r
+ return ret;\r
+}\r
+\r
+static void putword8_vec(Uint32 *disp, v8hi_t c, Uint32 *pal)\r
+{\r
+ v8hi_t *p = (v8hi_t *)disp;\r
+ register int j;\r
+\r
+// if(disp == NULL) return;\r
+\r
+ // recommand -finline-loop\r
+#ifdef __x86_64__\r
+ if((pal == NULL) || (disp == NULL))return;\r
+ asm ("movq %[c], %%r8\n\t"\r
+ "movdqa 0(%%r8), %%xmm0\n\t"\r
+ "movdqa 16(%%r8), %%xmm1\n\t"\r
+ "movq %[pal], %%r8\n\t"\r
+ "movq %[disp], %%rdi\n\t"\r
+ "movl $7, %%r9d\n\t"\r
+ "movd %%r9d, %%xmm2\n\t"\r
+ "pshufd $0b00000000, %%xmm2, %%xmm2\n\t"\r
+ "pand %%xmm2, %%xmm0\n\t"\r
+ "pand %%xmm2, %%xmm1\n\t"\r
+ "pshufd $0b00011011, %%xmm0, %%xmm0\n\t"\r
+ "pshufd $0b00011011, %%xmm1, %%xmm1\n\t"\r
+ \r
+ "movd %%xmm0, %%r9d\n\t"\r
+ "movd 0(%%r8, %%r9, 4), %%xmm2\n\t"\r
+ "psrldq $4, %%xmm0\n\t"\r
+ "movdqa %%xmm2, %%xmm3\n\t"\r
+ "pslldq $4, %%xmm3\n\t"\r
+ \r
+ "movd %%xmm0, %%r9d\n\t"\r
+ "movd 0(%%r8, %%r9, 4), %%xmm2\n\t"\r
+ "psrldq $4, %%xmm0\n\t"\r
+ "por %%xmm2, %%xmm3\n\t"\r
+ "pslldq $4, %%xmm3\n\t"\r
+\r
+ "movd %%xmm0, %%r9d\n\t"\r
+ "movd 0(%%r8, %%r9, 4), %%xmm2\n\t"\r
+ "psrldq $4, %%xmm0\n\t"\r
+ "por %%xmm2, %%xmm3\n\t"\r
+ "pslldq $4, %%xmm3\n\t"\r
+\r
+ "movd %%xmm0, %%r9d\n\t"\r
+ "movd 0(%%r8, %%r9, 4), %%xmm2\n\t"\r
+ "/* psrldq $4, %%xmm0 */\n\t"\r
+ "por %%xmm2, %%xmm3\n\t"\r
+ "/* pslldq $4, %%xmm3 */\n\t"\r
+ "movdqu %%xmm3, 0(%%rdi)\n\t"\r
+ \r
+ "movd %%xmm1, %%r10d\n\t"\r
+ "movd 0(%%r8, %%r10, 4), %%xmm4\n\t"\r
+ "psrldq $4, %%xmm1\n\t"\r
+ "movdqa %%xmm4, %%xmm5\n\t"\r
+ "pslldq $4, %%xmm5\n\t"\r
+ \r
+ "movd %%xmm1, %%r10d\n\t"\r
+ "movd 0(%%r8, %%r10, 4), %%xmm4\n\t"\r
+ "psrldq $4, %%xmm1\n\t"\r
+ "por %%xmm4, %%xmm5\n\t"\r
+ "pslldq $4, %%xmm5\n\t"\r
+\r
+ "movd %%xmm1, %%r10d\n\t"\r
+ "movd 0(%%r8, %%r10, 4), %%xmm4\n\t"\r
+ "psrldq $4, %%xmm1\n\t"\r
+ "por %%xmm4, %%xmm5\n\t"\r
+ "pslldq $4, %%xmm5\n\t"\r
+\r
+ "movd %%xmm1, %%r10d\n\t"\r
+ "movd 0(%%r8, %%r10, 4), %%xmm4\n\t"\r
+ "/* psrldq $4, %%xmm1 */\n\t"\r
+ "por %%xmm4, %%xmm5\n\t"\r
+ "/* pslldq $4, %%xmm5 */\n\t"\r
+ "movdqu %%xmm5, 16(%%rdi)\n\t"\r
+ :\r
+ : [c] "rm" (&c), [disp] "rm" (disp), [pal] "rm" (pal)\r
+ : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",\r
+ "r8", "r9", "r10", "rdi");\r
+#else\r
+ v8hi_t tmp;\r
+ if((pal == NULL) || (disp == NULL))return;\r
+ c.vv &= (v8ii){7, 7, 7, 7, 7, 7, 7, 7,};\r
+ for(j = 0; j < 8; j++) {\r
+ tmp.i[j] = pal[c.i[j]];\r
+ }\r
+ *p = tmp;\r
+#endif \r
+}\r
+\r
+\r
+static void getputvram_8_vec(Uint32 addr, Uint32 *disp, Uint32 *pal)\r
+{\r
+#ifdef __x86_64__\r
+ if((pal == NULL) || (disp == NULL)) return;\r
+ asm (\r
+ "movq %[vram_pg], %%r9\n\t"\r
+ "movq %[vram_pr], %%r10\n\t"\r
+ "movq %[vram_pb], %%r11\n\t"\r
+ \r
+ "movb 0(%%r11), %%r13b\n\t"\r
+ "movb 0(%%r10), %%r14b\n\t"\r
+ "movb 0(%%r9), %%r15b\n\t"\r
+ "andq $0xff, %%r13\n\t"\r
+ "andq $0xff, %%r14\n\t"\r
+ "andq $0xff, %%r15\n\t"\r
+ "shlq $5, %%r13\n\t"\r
+ "shlq $5, %%r14\n\t"\r
+ "shlq $5, %%r15\n\t"\r
+ "addq $0x2000, %%r14 /* 256 * 32 */\n\t"\r
+ "addq $0x4000, %%r15 /* 512 * 32 */\n\t"\r
+ \r
+ "movq %[pal], %%r8\n\t"\r
+ "movq %[disp], %%rdi\n\t"\r
+ "movq %[aPlanes], %%r12\n\t"\r
+ \r
+ "movdqa 0(%%r12, %%r13), %%xmm0\n\t"\r
+ "movdqa 0(%%r12, %%r14), %%xmm1\n\t"\r
+ "movdqa 0(%%r12, %%r15), %%xmm2\n\t"\r
+ "por %%xmm1, %%xmm0\n\t"\r
+ "por %%xmm2, %%xmm0\n\t"\r
+\r
+ "movdqa 16(%%r12, %%r13), %%xmm1\n\t"\r
+ "movdqa 16(%%r12, %%r14), %%xmm4\n\t"\r
+ "movdqa 16(%%r12, %%r15), %%xmm5\n\t"\r
+ "por %%xmm4, %%xmm1\n\t"\r
+ "por %%xmm5, %%xmm1\n\t"\r
+ \r
+ "movl $0x07, %%eax\n\t"\r
+ "movd %%eax, %%xmm2\n\t"\r
+ "pshufd $0b00000000, %%xmm2, %%xmm2\n\t"\r
+ "pand %%xmm2, %%xmm0\n\t"\r
+ "pand %%xmm2, %%xmm1\n\t"\r
+ "pshufd $0b00011011, %%xmm0, %%xmm0\n\t"\r
+ "pshufd $0b00011011, %%xmm1, %%xmm1\n\t"\r
+ "pxor %%xmm2, %%xmm2\n\t"\r
+ "pxor %%xmm4, %%xmm4\n\t"\r
+\r
+ "movd %%xmm0, %%r9d\n\t"\r
+ "movd 0(%%r8, %%r9, 4), %%xmm2\n\t"\r
+ "psrldq $4, %%xmm0\n\t"\r
+ "movdqa %%xmm2, %%xmm3\n\t"\r
+ "pslldq $4, %%xmm3\n\t"\r
+ \r
+ "movd %%xmm0, %%r9d\n\t"\r
+ "movd 0(%%r8, %%r9, 4), %%xmm2\n\t"\r
+ "psrldq $4, %%xmm0\n\t"\r
+ "por %%xmm2, %%xmm3\n\t"\r
+ "pslldq $4, %%xmm3\n\t"\r
+\r
+ "movd %%xmm0, %%r9d\n\t"\r
+ "movd 0(%%r8, %%r9, 4), %%xmm2\n\t"\r
+ "psrldq $4, %%xmm0\n\t"\r
+ "por %%xmm2, %%xmm3\n\t"\r
+ "pslldq $4, %%xmm3\n\t"\r
+\r
+ "movd %%xmm0, %%r9d\n\t"\r
+ "movd 0(%%r8, %%r9, 4), %%xmm2\n\t"\r
+ "por %%xmm2, %%xmm3\n\t"\r
+ "movdqu %%xmm3, 0(%%rdi)\n\t"\r
+ \r
+ "movd %%xmm1, %%r10d\n\t"\r
+ "movd 0(%%r8, %%r10, 4), %%xmm4\n\t"\r
+ "psrldq $4, %%xmm1\n\t"\r
+ "movdqa %%xmm4, %%xmm5\n\t"\r
+ "pslldq $4, %%xmm5\n\t"\r
+ \r
+ "movd %%xmm1, %%r10d\n\t"\r
+ "movd 0(%%r8, %%r10, 4), %%xmm4\n\t"\r
+ "psrldq $4, %%xmm1\n\t"\r
+ "por %%xmm4, %%xmm5\n\t"\r
+ "pslldq $4, %%xmm5\n\t"\r
+\r
+ "movd %%xmm1, %%r10d\n\t"\r
+ "movd 0(%%r8, %%r10, 4), %%xmm4\n\t"\r
+ "psrldq $4, %%xmm1\n\t"\r
+ "por %%xmm4, %%xmm5\n\t"\r
+ "pslldq $4, %%xmm5\n\t"\r
+\r
+ "movd %%xmm1, %%r10d\n\t"\r
+ "movd 0(%%r8, %%r10, 4), %%xmm4\n\t"\r
+ "por %%xmm4, %%xmm5\n\t"\r
+ "movdqu %%xmm5, 16(%%rdi)\n\t"\r
+ :\r
+ : [aPlanes] "rm" (aPlanes),\r
+ [disp] "rm" (disp), [pal] "rm" (pal), \r
+ [vram_pg] "rm" (&vram_pg[addr]), [vram_pr] "rm" (&vram_pr[addr]), [vram_pb] "rm" (&vram_pb[addr])\r
+ : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",\r
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",\r
+ "rdi");\r
+#else\r
+ v8hi_t c;\r
+ register uint8_t g, r, b;\r
+ int j;\r
+ v8hi_t *p = (v8hi_t *)disp;\r
+ v8hi_t tmp;\r
+ \r
+ if((pal == NULL) || (p == NULL))return;\r
+ \r
+ g = vram_pg[addr];\r
+ r = vram_pr[addr];\r
+ b = vram_pb[addr];\r
+\r
+ c.v = aPlanes[B0 + b] |\r
+ aPlanes[B1 + r] |\r
+ aPlanes[B2 + g];\r
+ c.vv &= (v8ii){7, 7, 7, 7, 7, 7, 7, 7,};\r
+ for(j = 0; j < 8; j++) {\r
+ tmp.i[j] = pal[c.i[j]];\r
+ }\r
+ *p = tmp;\r
+#endif\r
+}\r
+\r
+\r
+\r
+\r
+#else\r
+static inline void planeto8(Uint32 *c, uint8_t r, unit8_t g, uint8_t b)\r
+{\r
+ Uint8 mask;\r
+ \r
+ mask = 0x80;\r
+ c[0] = ((r & mask) >> 6) | ((g & mask) >> 5) || ((b & mask) >> 7);\r
+ mask >>= 1;\r
+ c[1] = ((r & mask) >> 5) | ((g & mask) >> 4) || ((b & mask) >> 6);\r
+ mask >>= 1;\r
+ c[2] = ((r & mask) >> 4) | ((g & mask) >> 3) || ((b & mask) >> 5);\r
+ mask >>= 1;\r
+ c[3] = ((r & mask) >> 3) | ((g & mask) >> 2) || ((b & mask) >> 4);\r
+ mask >>= 1;\r
+ c[4] = ((r & mask) >> 2) | ((g & mask) >> 1) || ((b & mask) >> 3);\r
+ mask >>= 1;\r
+ c[5] = ((r & mask) >> 1) | (g & mask) || ((b & mask) >> 2);\r
+ mask >>= 1;\r
+ c[6] = (r & mask) | ((g & mask) << 1) || ((b & mask) >> 1);\r
+ mask >>= 1;\r
+ c[7] = ((r & mask) << 1) | ((g & mask) << 2) || (b & mask);\r
+ mask >>= 1;\r
+}\r
+\r
+static void getvram_8(Uint32 addr, Uint32 *cbuf)\r
+{\r
+ uint8_t r, g, b;\r
+ /*\r
+ * R,G,Bについて8bit単位で描画する。\r
+ * 高速化…キャッシュヒット率の向上とVector演算(MMXetc)の速度効果を考慮して、\r
+ * ループの廃止を同時に行う\r
+ */\r
+ \r
+ g = vram_pg[addr];\r
+ r = vram_pr[addr];\r
+ b = vram_pb[addr];\r
+ planeto8(cbuf, r, g, b);\r
+ \r
+ return;\r
+}\r
+\r
+static inline void putword8(Uint32 *disp, Uint32 *c, Uint32 *pal)\r
+{\r
+\r
+ Uint32 *r1 = disp;\r
+\r
+ r1[0] = pal[c[0] & 7]; // ?!\r
+ r1[1] = pal[c[1] & 7];\r
+ r1[2] = pal[c[2] & 7];\r
+ r1[3] = pal[c[3] & 7];\r
+ r1[4] = pal[c[4] & 7];\r
+ r1[5] = pal[c[5] & 7];\r
+ r1[6] = pal[c[6] & 7];\r
+ r1[7] = pal[c[7] & 7];\r
+}\r
+\r
+#endif // __GNUC__ >= 4\r
+\r
+\r
+\r
+/*\r
+ * 8x8のピースをVRAMから作成する:VramLockしない事に注意\r
+ */\r
+void CreateVirtualVram8_1Pcs_SSE2(Uint32 *p, int x, int y, int pitch, int mode)\r
+{\r
+#if (__GNUC__ >= 4) \r
+ register v8hi_t c;\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ register v8hi_t *disp =(v8hi_t *) p;\r
+ register Uint32 addr;\r
+ register int i;\r
+ pitch = pitch / (sizeof(v8hi_t) / sizeof(Uint32));\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+// for(i = 0; i < 8; i++) __builtin_prefetch(&pal[i], 0, 0); // パレットテーブルをキャッシュに読み込ませておく\r
+ addr = y * 80 + x;\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp += pitch;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+// disp++;\r
+ return;\r
+ } else {\r
+#if 0\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+#else\r
+ getputvram_8_vec(addr, disp, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+\r
+ getputvram_8_vec(addr, disp, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getputvram_8_vec(addr, disp, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getputvram_8_vec(addr, disp, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getputvram_8_vec(addr, disp, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getputvram_8_vec(addr, disp, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getputvram_8_vec(addr, disp, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getputvram_8_vec(addr, disp, pal);\r
+// addr += 80;\r
+// disp += pitch;\r
+#endif\r
+ }\r
+#else \r
+ Uint32 c[8];\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ v8hi_t *disp =(V8hi_t *) p;\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+ addr = y * 80 + x;\r
+\r
+ // Loop廃止(高速化)\r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr , c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr , c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 80;\r
+ disp += pitch;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ // addr += 80;\r
+ // disp++;\r
+ \r
+#endif \r
+}\r
+\r
+/*\r
+ * ybegin - yendの行を変換する\r
+ */\r
+void CreateVirtualVram8_Line_SSE2(Uint32 *p, int ybegin, int yend, int mode)\r
+{\r
+#if (__GNUC__ >= 4) \r
+ register v8hi_t c;\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ register v8hi_t *disp =(v8hi_t *) p;\r
+ register Uint32 addr;\r
+ const int pitch = sizeof(Uint32) * 8;\r
+ int xx;\r
+ int yy;\r
+ register int i;\r
+ \r
+ if((p == NULL) || (pal == NULL)) return;\r
+\r
+// for(i = 0; i < 8; i++) __builtin_prefetch(&pal[i], 0, 0); // パレットテーブルをキャッシュに読み込ませておく\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.v = (v8si){0,0,0,0,0,0,0,0};\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = ybegin * 80;\r
+ for(xx = 0; xx < (80 / 8); xx ++) { \r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ }\r
+ }\r
+ return;\r
+ } else {\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 80;\r
+ for(xx = 0; xx < (80 / 8); xx++) { \r
+#if 1\r
+ getputvram_8_vec(addr, (Uint32 *)disp, pal);\r
+ addr++;\r
+ disp++;\r
+\r
+ getputvram_8_vec(addr, (Uint32 *)disp, pal);\r
+ addr++;\r
+ disp++;\r
+\r
+ getputvram_8_vec(addr, (Uint32 *)disp, pal);\r
+ addr++;\r
+ disp++;\r
+\r
+ getputvram_8_vec(addr, (Uint32 *)disp, pal);\r
+ addr++;\r
+ disp++;\r
+\r
+ getputvram_8_vec(addr, (Uint32 *)disp, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ getputvram_8_vec(addr, (Uint32 *)disp, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ getputvram_8_vec(addr, (Uint32 *)disp, pal);\r
+ addr++;\r
+ disp++;\r
+\r
+ getputvram_8_vec(addr, (Uint32 *)disp, pal);\r
+ addr++;\r
+ disp++;\r
+#else\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+#endif\r
+ }\r
+ \r
+ }\r
+ return;\r
+ }\r
+ #else \r
+ Uint32 c[8];\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ int xx;\r
+ int yy;\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = y * 80;\r
+ for(xx = 0; xx < (80 / 8) ; xx++) {\r
+ \r
+ // Loop廃止(高速化)\r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ getvram_8(addr , c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 1;\r
+ disp++;\r
+ \r
+ getvram_8(addr , c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 1;\r
+ disp++;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 1;\r
+ disp++;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 1;\r
+ disp++;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 1;\r
+ disp++;\r
+ \r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr += 1;\r
+ disp++;\r
+ }\r
+ }\r
+ \r
+ \r
+#endif \r
+}\r
+/*\r
+ * ybegin - yendの行を変換する\r
+ */\r
+void CreateVirtualVram8_WindowedLine_SSE2(Uint32 *p, int ybegin, int yend, int xbegin, int xend, int mode)\r
+{\r
+#if (__GNUC__ >= 4) \r
+ register v8hi_t c;\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ Uint32 addr;\r
+ int pitch;\r
+ int xx;\r
+ int yy;\r
+ \r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+\r
+ // Loop廃止(高速化)\r
+ if(aPlanes == NULL) {\r
+ c.vv = (v8ii){0,0,0,0,0,0,0,0};\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 80 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 640 + xbegin]);\r
+ for(xx = xbegin; xx < xend; xx ++) { \r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ disp++;\r
+ }\r
+ }\r
+ return;\r
+ } else {\r
+ int xs = (xend - xbegin) / 8;\r
+ int xs2 = (xend - xbegin) % 8;\r
+ int xx2;\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = yy * 80 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 640 + xbegin]);\r
+ xx = xbegin;\r
+ for(xx2 = 0; xx2 < xs; xx2++) {\r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ xx += 8;\r
+ }\r
+ if(xs2 <= 0) continue;\r
+ for(; xx < xend; xx++) { \r
+ c = getvram_8_vec(addr);\r
+ putword8_vec((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ }\r
+ }\r
+ return;\r
+ }\r
+ #else \r
+ Uint32 c[8];\r
+ Uint32 *pal = (Uint32 *)rgbTTLGDI;\r
+ Uint8 *disp =(Uint8 *) p;\r
+ int xx;\r
+ int yy;\r
+\r
+ if((p == NULL) || (pal == NULL)) return;\r
+ pitch = sizeof(Uint32) * 8;\r
+ for(yy = ybegin; yy < yend; yy++) { \r
+ addr = y * 80 + xbegin;\r
+ disp = (Uint8 *)(&p[yy * 640 + xbegin]);\r
+ for(xx = xbegin; xx < xend; xx++) {\r
+ getvram_8(addr, c);\r
+ putword8((Uint32 *)disp, c, pal);\r
+ addr++;\r
+ disp++;\r
+ }\r
+ }\r
+#endif \r
+}\r
+\r
+Api_Vram_FuncList api_vram8_sse2 = {\r
+ CreateVirtualVram8_1Pcs_SSE2,\r
+ CreateVirtualVram8_Line_SSE2,\r
+ CreateVirtualVram8_WindowedLine\r
+};\r
--- /dev/null
+/*\r
+ * api_vramvec.cpp\r
+ * Convert VRAM -> VirtualVram(Vector Version)\r
+ * (C) 2012 K.Ohta <whatisthis.sowhat@gmail.com>\r
+ */\r
+\r
+\r
+#include "xm7_types.h"\r
+#include "api_draw.h"\r
+#include "api_vram.h"\r
+\r
+/*\r
+* Definition of Convertsion Tables.\r
+*/\r
+// Reduce Tables 20120131\r
+\r
+extern v8si *aPlanes;\r
+\r
+\r
+v8hi_t lshift_6bit8v_SSE2(v8hi_t v)\r
+{\r
+ v8hi_t r;\r
+ register v8hi_t cbuf;\r
+ register v8hi_t mask;\r
+ v8hi_t ret;\r
+ mask.v = (v8si){0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8};\r
+ \r
+ cbuf.v =\r
+ aPlanes[B2 + v.b[0]] |\r
+ aPlanes[B3 + v.b[1]] |\r
+ aPlanes[R0 + v.b[2]] |\r
+ aPlanes[R1 + v.b[3]] |\r
+ aPlanes[R2 + v.b[4]] |\r
+ aPlanes[R3 + v.b[5]];\r
+ \r
+ mask.v = mask.v & cbuf.v;\r
+#if ((__GNUC__ == 4) && (__GCC_MINOR__ >= 7)) || (__GNUC__ > 4) //GCC 4.7 or later.\r
+ r.v = mask.v != (v8si){0, 0, 0, 0, 0, 0, 0, 0};\r
+ r.v = r.v & (v8si) {0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};\r
+ cbuf.v = cbuf.v | r.v;\r
+#else\r
+ if(mask.s[0] != 0) cbuf.s[0] |= 0x03;\r
+ if(mask.s[1] != 0) cbuf.s[1] |= 0x03;\r
+ if(mask.s[2] != 0) cbuf.s[2] |= 0x03;\r
+ if(mask.s[3] != 0) cbuf.s[3] |= 0x03;\r
+ if(mask.s[4] != 0) cbuf.s[4] |= 0x03;\r
+ if(mask.s[5] != 0) cbuf.s[5] |= 0x03;\r
+ if(mask.s[6] != 0) cbuf.s[6] |= 0x03;\r
+ if(mask.s[7] != 0) cbuf.s[7] |= 0x03;\r
+#endif \r
+// ret = cbuf;\r
+ return cbuf;\r
+}\r
+\r
+\r
+\r
+\r
--- /dev/null
+BEGIN {
+ printf "const char* %s = \"", VARNAME;
+}
+
+/^.*/ {
+ gsub("\"", "\\\"", $0);
+ printf "%s\\n", $0;
+}
+
+END {
+ printf "\";\n"
+}
\ No newline at end of file
#ifndef _COMMON_H_\r
#define _COMMON_H_\r
\r
+#if defined(_USE_AGAR) || defined(_USE_SDL)\r
+#include <SDL.h>\r
+\r
+# ifndef uint8\r
+ typedef uint8_t uint8;\r
+# endif\r
+# ifndef int8\r
+ typedef int8_t int8;\r
+# endif\r
+# ifndef uint16\r
+ typedef uint16_t uint16;\r
+# endif\r
+# ifndef int16\r
+ typedef int16_t int16;\r
+# endif\r
+# ifndef uint32\r
+ typedef uint32_t uint32;\r
+# endif\r
+# ifndef int32\r
+ typedef int32_t int32;\r
+# endif\r
+# ifndef uint64\r
+ typedef uint64_t uint64;\r
+# endif\r
+# ifndef int64\r
+ typedef int64_t int64;\r
+# endif\r
+# ifndef BOOL\r
+ typedef int BOOL;\r
+# endif\r
+# ifndef BYTE\r
+ typedef uint8_t BYTE;\r
+# endif\r
+# ifndef WORD\r
+ typedef uint16_t WORD;\r
+# endif\r
+# ifndef DWORD\r
+ typedef uint32_t DWORD;\r
+# endif\r
+# ifndef QWORD\r
+ typedef uint64_t QWORD;\r
+# endif\r
+\r
+\r
+\r
+// tchar.h\r
+# ifdef _UNICODE\r
+# define __T(x) L ## x\r
+# else\r
+# define __T(x) x\r
+# endif\r
+ \r
+# define _T(x) __T(x)\r
+# define _TEXT(x) __T(x)\r
+\r
+# ifdef _UNICODE\r
+ typedef wchar_t _TCHAR;\r
+# else\r
+ typedef char _TCHAR;\r
+# endif\r
+\r
+# ifndef LPCTSTR\r
+ typedef _TCHAR LPCTSTR;\r
+# endif\r
+\r
+# ifdef _USE_GETTEXT\r
+# include <libintl.h>\r
+# define _N(x) gettext(x)\r
+# else\r
+# define _N(x) _T(x)\r
+# endif\r
+\r
+#if (SDL_BYTEORDER == SDL_LIL_ENDIAN)\r
+static inline DWORD EndianToLittle_DWORD(DWORD x)\r
+{\r
+ return x;\r
+}\r
+\r
+static inline WORD EndianToLittle_WORD(WORD x)\r
+{\r
+ return x;\r
+}\r
+#else // BIG_ENDIAN\r
+static inline DWORD EndianToLittle_DWORD(DWORD x)\r
+{\r
+ DWORD y;\r
+ y = ((x & 0x000000ff) << 24) | ((x & 0x0000ff00) << 8) |\r
+ ((x & 0x00ff0000) >> 8) | ((x & 0xff000000) >> 24);\r
+ return y;\r
+}\r
+\r
+static inline WORD EndianToLittle_WORD(WORD x)\r
+{\r
+ WORD y;\r
+ y = ((x & 0x00ff) << 8) | ((x & 0xff00) >> 8);\r
+ return y;\r
+}\r
+#endif\r
+#define ZeroMemory(p,s) memset(p,0x00,s)\r
+#define CopyMemory(t,f,s) memcopy(t,f,s)\r
+\r
+extern "C" \r
+{\r
+extern void Sleep(int tick);\r
+extern uint32_t timeGetTime(void);\r
+}\r
+\r
+\r
+#else\r
#include <tchar.h>\r
\r
// variable scope of 'for' loop for microsoft visual c++ 6.0 and embedded visual c++ 4.0\r
#pragma warning( disable : 4995 )\r
#pragma warning( disable : 4996 )\r
#endif\r
+#endif\r
\r
// type definition\r
#ifndef uint8\r
#else\r
typedef signed long long int64;\r
#endif\r
+\r
+static inline DWORD EndianToLittle_DWORD(DWORD x)\r
+{\r
+ return x;\r
+}\r
+\r
+static inline WORD EndianToLittle_WORD(WORD x)\r
+{\r
+ return x;\r
+}\r
+\r
+\r
#endif\r
\r
+\r
typedef union {\r
#ifdef _BIG_ENDIAN\r
struct {\r
\r
[ config ]\r
*/\r
-\r
+#if defined(_USE_AGAR) || defined(_USE_SDL)\r
+#include <SDL.h>\r
+#include <agar/core.h>\r
+#else\r
#include <windows.h>\r
+#endif\r
+\r
#include <stdlib.h>\r
#include <stdio.h>\r
#include "config.h"\r
#include "fileio.h"\r
\r
config_t config;\r
-\r
BOOL WritePrivateProfileInt(LPCTSTR lpAppName, LPCTSTR lpKeyName, int Value, LPCTSTR lpFileName)\r
{\r
_TCHAR String[32];\r
return (GetPrivateProfileInt(lpAppName, lpKeyName, bDefault ? 1 : 0, lpFileName) != 0);\r
}\r
\r
+\r
void init_config()\r
{\r
// initial settings\r
// output i/o debug log\r
// #define _IO_DEBUG_LOG\r
#endif\r
+\r
#if defined(_USE_AGAR) || defined(_USE_SDL)\r
# include <SDL.h>\r
# include <agar/core.h>\r
\r
// Wrapper of WIN32->*nix\r
\r
-// tchar.h\r
-# ifdef _UNICODE\r
-# define __T(x) L ## x\r
-# else\r
-# define __T(x) x\r
-# endif\r
- \r
-# define _T(x) __T(x)\r
-# define _TEXT(x) __T(x)\r
-\r
-# ifdef _UNICODE\r
- typedef wchar_t _TCHAR;\r
-# else\r
- typedef char _TCHAR;\r
-# endif\r
- typedef int bool;\r
- typedef bool BOOL;\r
-\r
-# ifdef _USE_GETTEXT\r
-# include <libintl.h>\r
-# define _N(x) gettext(x)\r
-# else\r
-# define _N(x) _T(x)\r
-# endif\r
+\r
\r
#else // _USE_WIN32\r
#include <windows.h>\r
#include <windowsx.h>\r
#include <mmsystem.h>\r
#include <process.h>\r
+\r
#endif // _USE_WIN32\r
\r
#include <stdio.h>\r
--- /dev/null
+cmake_minimum_required (VERSION 2.6)
+
+message("* vm")
+
+add_library(vm_vm
+ 315-5124.cpp
+ and.cpp
+ beep.cpp
+ datarec.cpp
+ disk.cpp
+ event.cpp
+ hd146818p.cpp
+ hd46505.cpp
+ hd63484.cpp
+ huc6280.cpp
+ i286.cpp
+ i386.cpp
+ i8080.cpp
+ i8155.cpp
+ i8237.cpp
+ i8251.cpp
+ i8253.cpp
+ i8255.cpp
+ i8259.cpp
+ i86.cpp
+ io.cpp
+ ld700.cpp
+ ls244.cpp
+ ls393.cpp
+ m6502.cpp
+ mb8877.cpp
+ mc6800.cpp
+ mc6809.cpp
+ mc6820.cpp
+ mc6840.cpp
+ mc6847.cpp
+ mcs48.cpp
+ memory.cpp
+ msm58321.cpp
+ nand.cpp
+ nor.cpp
+ not.cpp
+ or.cpp
+ pc6031.cpp
+ pc80s31k.cpp
+ pcm1bit.cpp
+ rp5c01.cpp
+ sn76489an.cpp
+ tf20.cpp
+ tms9918a.cpp
+ tms9995.cpp
+ upd1990a.cpp
+ upd4991a.cpp
+ upd71071.cpp
+ upd7220.cpp
+ upd765a.cpp
+ upd7752.cpp
+ upd7801.cpp
+ w3100a.cpp
+ ym2151.cpp
+ ym2203.cpp
+ ym2413.cpp
+ z80.cpp
+ z80ctc.cpp
+ z80dma.cpp
+ z80pio.cpp
+ z80sio.cpp
+)
\ No newline at end of file
--- /dev/null
+cmake_minimum_required (VERSION 2.6)
+
+message("* vm/fmgen")
+
+add_library(vm_fmgen
+ file.cpp
+ fmgen.cpp
+ fmtimer.cpp
+ opm.cpp
+ opna.cpp
+ psg.cpp
+)
\ No newline at end of file
--- /dev/null
+cmake_minimum_required (VERSION 2.6)
+
+message("* vm/x1")
+
+add_library(vm_pc8801
+ pc88.cpp
+ pc8801.cpp
+)
\ No newline at end of file
--- /dev/null
+cmake_minimum_required (VERSION 2.6)
+
+message("* vm/x1")
+
+add_library(vm_x1
+ wdisplay.cpp
+ emm.cpp
+ floppy.cpp
+ io.cpp
+ joystick.cpp
+ keyboard.cpp
+ memory.cpp
+ mouse.cpp
+ printer.cpp
+ psub.cpp
+ sub.cpp
+ x1.cpp
+)
\ No newline at end of file