OSDN Git Service

[WIP][Agar] Build fit to CSP.
[csp-qt/common_source_project-fm7.git] / source / src / agar / common / scaler / sse2 / scaler_x6_sse2.c
1 /*
2  * Zoom x2x2
3  * (C) 2013 K.Ohta
4  * 
5  * History:
6  *  2013-04-02 Move from scaler_x2.c
7  */
8 #include <agar/core.h>
9 #include <agar/gui.h>
10 #include "simd_types.h"
11 #include "sdl_cpuid.h"
12 #include "cache_wrapper.h"
13
14 extern struct XM7_CPUID *pCpuID;
15 extern BOOL bFullScan;
16
17 extern void pVram2RGB_x6_Line(Uint32 *src, Uint32 *dst, int x, int y, int yrep);
18
19 #if defined(__SSE2__)
20 static void Scaler_DrawLine(v4hi *dst, Uint32 *src, int ww, int repeat, int pitch)
21 {
22    int xx;
23    int yy;
24    int yrep2;
25    int yrep3;
26    int blank;
27    v4hi *b2p;
28    v4hi r1, r2;
29    v4hi *d0;
30    v4hi *b;
31    int pitch2;
32 #if AG_BIG_ENDIAN != 1
33    const v4ui bb = {0xff000000, 0xff000000, 0xff000000, 0xff000000};
34 #else
35    const v4ui bb = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff};
36 #endif
37      
38    if(repeat <= 0) return;
39    b = (v4hi *)src;
40    b2p = dst;
41    pitch2 = pitch / sizeof(v4hi);
42    if((bFullScan) || (repeat < 2)) {
43       v4hi r3, r4, r5, r6, r7;
44       v4hi r8, r9, r10, r11, r12;
45       v4hi r13, r14;
46       for(xx = 0; xx < ww; xx += 8) {
47          b2p = dst;
48          r1 = *b++;
49          r2 = *b++;
50          r3.uv   = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};  
51          r4.uv   = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[1]};  
52          r5.uv   = (v4ui){r1.i[1], r1.i[1], r1.i[1], r1.i[1]};  
53          r6.uv   = (v4ui){r1.i[2], r1.i[2], r1.i[2], r1.i[2]};  
54          r7.uv   = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};  
55          r8.uv   = (v4ui){r1.i[3], r1.i[3], r1.i[3], r1.i[3]};
56          
57          r9.uv   = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[0]};  
58          r10.uv  = (v4ui){r2.i[0], r2.i[0], r2.i[1], r2.i[1]};  
59          r11.uv  = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[1]};  
60          r12.uv  = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};  
61          r13.uv  = (v4ui){r2.i[2], r2.i[2], r2.i[3], r2.i[3]};  
62          r14.uv  = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
63          for(yy = 0; yy < repeat; yy++) {
64             b2p[0] = r3;
65             b2p[1] = r4;
66             b2p[2] = r5;
67             b2p[3] = r6;
68             b2p[4] = r7;
69             b2p[5] = r8;
70             b2p[6] = r9;
71             b2p[7] = r10;
72             b2p[8] = r11;
73             b2p[9] = r12;
74             b2p[10] = r13;
75             b2p[11] = r14;
76             b2p = b2p + pitch2;
77          }
78          dst += 10;
79 //       b += 2;
80       }
81    } else {
82       v4hi r3, r4, r5, r6, r7;
83       v4hi r8, r9, r10, r11, r12;
84       v4hi r13, r14;
85       for(xx = 0; xx < ww; xx += 8) {
86          b2p = dst;
87          r1 = *b++;
88          r2 = *b++;
89
90          r3.uv   = (v4ui){r1.i[0], r1.i[0], r1.i[0], r1.i[0]};  
91          r4.uv   = (v4ui){r1.i[0], r1.i[0], r1.i[1], r1.i[1]};  
92          r5.uv   = (v4ui){r1.i[1], r1.i[1], r1.i[1], r1.i[1]};  
93          r6.uv   = (v4ui){r1.i[2], r1.i[2], r1.i[2], r1.i[2]};  
94          r7.uv   = (v4ui){r1.i[2], r1.i[2], r1.i[3], r1.i[3]};  
95          r8.uv   = (v4ui){r1.i[3], r1.i[3], r1.i[3], r1.i[3]};
96
97          r9.uv   = (v4ui){r2.i[0], r2.i[0], r2.i[0], r2.i[0]};  
98          r10.uv  = (v4ui){r2.i[0], r2.i[0], r2.i[1], r2.i[1]};  
99          r11.uv  = (v4ui){r2.i[1], r2.i[1], r2.i[1], r2.i[1]};  
100          r12.uv  = (v4ui){r2.i[2], r2.i[2], r2.i[2], r2.i[2]};  
101          r13.uv  = (v4ui){r2.i[2], r2.i[2], r2.i[3], r2.i[3]};  
102          r14.uv  = (v4ui){r2.i[3], r2.i[3], r2.i[3], r2.i[3]};
103          for(yy = 0; yy < repeat - 1; yy++) {
104             b2p[0] = r3;
105             b2p[1] = r4;
106             b2p[2] = r5;
107             b2p[3] = r6;
108             b2p[4] = r7;
109             b2p[5] = r8;
110             b2p[6] = r9;
111             b2p[7] = r10;
112             b2p[8] = r11;
113             b2p[9] = r12;
114             b2p[10] = r13;
115             b2p[11] = r14;
116             b2p = b2p + pitch2;
117          }
118          b2p[0].uv = 
119          b2p[1].uv = 
120          b2p[2].uv = 
121          b2p[3].uv = 
122          b2p[4].uv = 
123          b2p[5].uv = 
124          b2p[6].uv = 
125          b2p[7].uv = 
126          b2p[8].uv = 
127          b2p[9].uv = 
128          b2p[10].uv =
129          b2p[11].uv = bb;
130          dst += 12;
131 //       b += 2;
132       }
133    }
134    
135 }
136
137
138
139 void pVram2RGB_x6_Line_SSE2(Uint32 *src, Uint8 *dst, int xbegin, int xend, int y, int yrep)
140 {
141    register v4hi *b;
142    AG_Surface *Surface = GetDrawSurface();
143    Uint32 *d1;
144    Uint32 *d2;
145    Uint32 *p;
146    int w;
147    int h;
148    int yy;
149    int xx;
150    int hh;
151    int ww;
152    int i;
153    int x = xbegin;
154    int yrep2;
155    unsigned  pitch;
156    Uint32 black;
157    if(Surface == NULL) return;
158    w = Surface->w;
159    h = Surface->h;
160
161
162    ww = xend - xbegin;
163 //   if(ww > (w / 2)) ww = w / 2;
164    ww = (ww / 8) * 8;
165    if(ww <= 0) return;
166
167
168 #if AG_BIG_ENDIAN != 1
169    black = 0xff000000;
170 #else
171    black = 0x000000ff;
172 #endif
173 //   yrep = yrep * 16.0f;
174
175    yrep2 = yrep;
176
177    d1 = (Uint32 *)((Uint8 *)dst + x * 6 * Surface->format->BytesPerPixel);
178    d2 = &src[x + y * 640];
179    Scaler_DrawLine((v4hi *)d1, (Uint32 *)d2, ww, yrep2, Surface->pitch);
180 //   AG_SurfaceUnlock(Surface);
181    return;
182 }
183
184
185 #else 
186
187 void pVram2RGB_x6_Line_SSE2(Uint32 *src, int xbegin,  int xend, int y, int yrep)
188 {
189    pVram2RGB_x6_Line(src, dst, x, y, yrep);
190 }
191
192 #endif // __SSE2__