OSDN Git Service

Blitter clear implementation
[android-x86/external-swiftshader.git] / src / Renderer / Blitter.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2013 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Blitter.hpp"
13
14 #include "Common/Debug.hpp"
15 #include "Reactor/Reactor.hpp"
16
17 namespace sw
18 {
19         Blitter blitter;
20
21         Blitter::Blitter()
22         {
23                 blitCache = new RoutineCache<BlitState>(1024);
24         }
25
26         Blitter::~Blitter()
27         {
28                 delete blitCache;
29         }
30
31         void Blitter::clear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
32         {
33                 sw::Surface color(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
34                 Blitter::Options clearOptions = static_cast<sw::Blitter::Options>((rgbaMask & 0xF) | CLEAR_OPERATION);
35                 blit(&color, dRect, dest, dRect, clearOptions);
36         }
37
38         void Blitter::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
39         {
40                 Blitter::Options options = filter ? static_cast<Blitter::Options>(WRITE_RGBA | FILTER_LINEAR) : WRITE_RGBA;
41                 blit(source, sRect, dest, dRect, options);
42         }
43
44         void Blitter::blit(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
45         {
46                 if(blitReactor(source, sourceRect, dest, destRect, options))
47                 {
48                         return;
49                 }
50
51                 SliceRect sRect = sourceRect;
52                 SliceRect dRect = destRect;
53
54                 bool flipX = destRect.x0 > destRect.x1;
55                 bool flipY = destRect.y0 > destRect.y1;
56
57                 if(flipX)
58                 {
59                         swap(dRect.x0, dRect.x1);
60                         swap(sRect.x0, sRect.x1);
61                 }
62                 if(flipY)
63                 {
64                         swap(dRect.y0, dRect.y1);
65                         swap(sRect.y0, sRect.y1);
66                 }
67
68                 source->lockInternal(sRect.x0, sRect.y0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
69                 dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
70
71                 float w = static_cast<float>(sRect.x1 - sRect.x0) / static_cast<float>(dRect.x1 - dRect.x0);
72                 float h = static_cast<float>(sRect.y1 - sRect.y0) / static_cast<float>(dRect.y1 - dRect.y0);
73
74                 const float xStart = (float)sRect.x0 + 0.5f * w;
75                 float y = (float)sRect.y0 + 0.5f * h;
76
77                 for(int j = dRect.y0; j < dRect.y1; j++)
78                 {
79                         float x = xStart;
80
81                         for(int i = dRect.x0; i < dRect.x1; i++)
82                         {
83                                 // FIXME: Support RGBA mask
84                                 dest->copyInternal(source, i, j, x, y, (options & FILTER_LINEAR) == FILTER_LINEAR);
85
86                                 x += w;
87                         }
88
89                         y += h;
90                 }
91
92                 source->unlockInternal();
93                 dest->unlockInternal();
94         }
95
96         void Blitter::blit3D(Surface *source, Surface *dest)
97         {
98                 source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
99                 dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
100
101                 float w = static_cast<float>(source->getWidth())  / static_cast<float>(dest->getWidth());
102                 float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
103                 float d = static_cast<float>(source->getDepth())  / static_cast<float>(dest->getDepth());
104
105                 float z = 0.5f * d;
106                 for(int k = 0; k < dest->getDepth(); ++k)
107                 {
108                         float y = 0.5f * h;
109                         for(int j = 0; j < dest->getHeight(); ++j)
110                         {
111                                 float x = 0.5f * w;
112                                 for(int i = 0; i < dest->getWidth(); ++i)
113                                 {
114                                         dest->copyInternal(source, i, j, k, x, y, z, true);
115                                         x += w;
116                                 }
117                                 y += h;
118                         }
119                         z += d;
120                 }
121
122                 source->unlockInternal();
123                 dest->unlockInternal();
124         }
125
126         bool Blitter::read(Float4 &c, Pointer<Byte> element, Format format)
127         {
128                 c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
129
130                 switch(format)
131                 {
132                 case FORMAT_L8:
133                         c.xyz = Float(Int(*Pointer<Byte>(element)));
134                         break;
135                 case FORMAT_A8:
136                         c.xyz = 0.0f;
137                         c.w = Float(Int(*Pointer<Byte>(element)));
138                         break;
139                 case FORMAT_R8I:
140                         c.yzw = 0.0f;
141                         c.x = Float(Int(*Pointer<SByte>(element)));
142                         break;
143                 case FORMAT_R8UI:
144                         c.yzw = 0.0f;
145                         c.x = Float(Int(*Pointer<Byte>(element)));
146                         break;
147                 case FORMAT_R8I_SNORM:
148                         c.yzw = 0.0f;
149                         c.x = Float(Int(*Pointer<SByte>(element)));
150                         break;
151                 case FORMAT_R8:
152                         c.yzw = 0.0f;
153                         c.x = Float(Int(*Pointer<Byte>(element)));
154                         break;
155                 case FORMAT_R16I:
156                         c.yzw = 0.0f;
157                         c.x = Float(Int(*Pointer<Short>(element)));
158                         break;
159                 case FORMAT_R16UI:
160                         c.yzw = 0.0f;
161                         c.x = Float(Int(*Pointer<UShort>(element)));
162                         break;
163                 case FORMAT_R32I:
164                         c.yzw = 0.0f;
165                         c.x = Float(Int(*Pointer<Int>(element)));
166                         break;
167                 case FORMAT_R32UI:
168                         c.yzw = 0.0f;
169                         c.x = Float(Int(*Pointer<UInt>(element)));
170                         break;
171                 case FORMAT_A8R8G8B8:
172                         c = Float4(*Pointer<Byte4>(element)).zyxw;
173                         break;
174                 case FORMAT_A8B8G8R8I:
175                         c = Float4(*Pointer<SByte4>(element));
176                         break;
177                 case FORMAT_A8B8G8R8UI:
178                         c = Float4(*Pointer<Byte4>(element));
179                         break;
180                 case FORMAT_A8B8G8R8I_SNORM:
181                         c = Float4(*Pointer<SByte4>(element));
182                         break;
183                 case FORMAT_A8B8G8R8:
184                         c = Float4(*Pointer<Byte4>(element));
185                         break;
186                 case FORMAT_X8R8G8B8:
187                         c = Float4(*Pointer<Byte4>(element)).zyxw;
188                         c.w = float(0xFF);
189                         break;
190                 case FORMAT_X8B8G8R8I:
191                         c = Float4(*Pointer<SByte4>(element));
192                         c.w = float(0x7F);
193                         break;
194                 case FORMAT_X8B8G8R8UI:
195                         c = Float4(*Pointer<Byte4>(element));
196                         c.w = float(0xFF);
197                         break;
198                 case FORMAT_X8B8G8R8I_SNORM:
199                         c = Float4(*Pointer<SByte4>(element));
200                         c.w = float(0x7F);
201                         break;
202                 case FORMAT_X8B8G8R8:
203                         c = Float4(*Pointer<Byte4>(element));
204                         c.w = float(0xFF);
205                         break;
206                 case FORMAT_A16B16G16R16I:
207                         c = Float4(*Pointer<Short4>(element));
208                         break;
209                 case FORMAT_A16B16G16R16:
210                 case FORMAT_A16B16G16R16UI:
211                         c = Float4(*Pointer<UShort4>(element));
212                         break;
213                 case FORMAT_X16B16G16R16I:
214                         c = Float4(*Pointer<Short4>(element));
215                         c.w = float(0x7FFF);
216                         break;
217                 case FORMAT_X16B16G16R16UI:
218                         c = Float4(*Pointer<UShort4>(element));
219                         c.w = float(0xFFFF);
220                         break;
221                 case FORMAT_A32B32G32R32I:
222                         c = Float4(*Pointer<Int4>(element));
223                         break;
224                 case FORMAT_A32B32G32R32UI:
225                         c = Float4(*Pointer<UInt4>(element));
226                         break;
227                 case FORMAT_X32B32G32R32I:
228                         c = Float4(*Pointer<Int4>(element));
229                         c.w = float(0x7FFFFFFF);
230                         break;
231                 case FORMAT_X32B32G32R32UI:
232                         c = Float4(*Pointer<UInt4>(element));
233                         c.w = float(0xFFFFFFFF);
234                         break;
235                 case FORMAT_G8R8I:
236                         c.x = Float(Int(*Pointer<SByte>(element + 0)));
237                         c.y = Float(Int(*Pointer<SByte>(element + 1)));
238                         break;
239                 case FORMAT_G8R8UI:
240                         c.x = Float(Int(*Pointer<Byte>(element + 0)));
241                         c.y = Float(Int(*Pointer<Byte>(element + 1)));
242                         break;
243                 case FORMAT_G8R8I_SNORM:
244                         c.x = Float(Int(*Pointer<SByte>(element + 0)));
245                         c.y = Float(Int(*Pointer<SByte>(element + 1)));
246                         break;
247                 case FORMAT_G8R8:
248                         c.x = Float(Int(*Pointer<Byte>(element + 0)));
249                         c.y = Float(Int(*Pointer<Byte>(element + 1)));
250                         break;
251                 case FORMAT_G16R16I:
252                         c.x = Float(Int(*Pointer<Short>(element + 0)));
253                         c.y = Float(Int(*Pointer<Short>(element + 2)));
254                         break;
255                 case FORMAT_G16R16:
256                 case FORMAT_G16R16UI:
257                         c.x = Float(Int(*Pointer<UShort>(element + 0)));
258                         c.y = Float(Int(*Pointer<UShort>(element + 2)));
259                         break;
260                 case FORMAT_G32R32I:
261                         c.x = Float(Int(*Pointer<Int>(element + 0)));
262                         c.y = Float(Int(*Pointer<Int>(element + 4)));
263                         break;
264                 case FORMAT_G32R32UI:
265                         c.x = Float(Int(*Pointer<UInt>(element + 0)));
266                         c.y = Float(Int(*Pointer<UInt>(element + 4)));
267                         break;
268                 case FORMAT_A32B32G32R32F:
269                         c = *Pointer<Float4>(element);
270                         break;
271                 case FORMAT_G32R32F:
272                         c.x = *Pointer<Float>(element + 0);
273                         c.y = *Pointer<Float>(element + 4);
274                         break;
275                 case FORMAT_R32F:
276                         c.x = *Pointer<Float>(element);
277                         break;
278                 case FORMAT_R5G6B5:
279                         c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
280                         c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
281                         c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
282                         break;
283                 default:
284                         return false;
285                 }
286
287                 return true;
288         }
289
290         bool Blitter::write(Float4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
291         {
292                 bool writeR = (options & WRITE_RED) == WRITE_RED;
293                 bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
294                 bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
295                 bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
296                 bool writeRGBA = writeR && writeG && writeB && writeA;
297
298                 switch(format)
299                 {
300                 case FORMAT_L8:
301                         *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
302                         break;
303                 case FORMAT_A8:
304                         if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
305                         break;
306                 case FORMAT_A8R8G8B8:
307                         if(writeRGBA)
308                         {
309                                 UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
310                                 Byte8 c1 = Pack(c0, c0);
311                                 *Pointer<UInt>(element) = UInt(As<Long>(c1));
312                         }
313                         else
314                         {
315                                 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
316                                 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
317                                 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
318                                 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
319                         }
320                         break;
321                 case FORMAT_A8B8G8R8:
322                         if(writeRGBA)
323                         {
324                                 UShort4 c0 = As<UShort4>(RoundShort4(c));
325                                 Byte8 c1 = Pack(c0, c0);
326                                 *Pointer<UInt>(element) = UInt(As<Long>(c1));
327                         }
328                         else
329                         {
330                                 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
331                                 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
332                                 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
333                                 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
334                         }
335                         break;
336                 case FORMAT_X8R8G8B8:
337                         if(writeRGBA)
338                         {
339                                 UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
340                                 Byte8 c1 = Pack(c0, c0);
341                                 *Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
342                         }
343                         else
344                         {
345                                 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
346                                 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
347                                 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
348                                 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
349                         }
350                         break;
351                 case FORMAT_X8B8G8R8:
352                         if(writeRGBA)
353                         {
354                                 UShort4 c0 = As<UShort4>(RoundShort4(c));
355                                 Byte8 c1 = Pack(c0, c0);
356                                 *Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
357                         }
358                         else
359                         {
360                                 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
361                                 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
362                                 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
363                                 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
364                         }
365                         break;
366                 case FORMAT_A32B32G32R32F:
367                         if(writeRGBA)
368                         {
369                                 *Pointer<Float4>(element) = c;
370                         }
371                         else
372                         {
373                                 if(writeR) { *Pointer<Float>(element) = c.x; }
374                                 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
375                                 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
376                                 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
377                         }
378                         break;
379                 case FORMAT_G32R32F:
380                         if(writeR && writeG)
381                         {
382                                 *Pointer<Float2>(element) = Float2(c);
383                         }
384                         else
385                         {
386                                 if(writeR) { *Pointer<Float>(element) = c.x; }
387                                 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
388                         }
389                         break;
390                 case FORMAT_R32F:
391                         if(writeR) { *Pointer<Float>(element) = c.x; }
392                         break;
393                 case FORMAT_A8B8G8R8I:
394                 case FORMAT_A8B8G8R8I_SNORM:
395                         if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
396                 case FORMAT_X8B8G8R8I:
397                 case FORMAT_X8B8G8R8I_SNORM:
398                         if(writeA && (format == FORMAT_X8B8G8R8I || format == FORMAT_X8B8G8R8I_SNORM))
399                         {
400                                 *Pointer<SByte>(element + 3) = SByte(0x7F);
401                         }
402                         if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
403                 case FORMAT_G8R8I:
404                 case FORMAT_G8R8I_SNORM:
405                         if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
406                 case FORMAT_R8I:
407                 case FORMAT_R8I_SNORM:
408                         if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
409                         break;
410                 case FORMAT_A8B8G8R8UI:
411                         if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
412                 case FORMAT_X8B8G8R8UI:
413                         if(writeA && (format == FORMAT_X8B8G8R8UI))
414                         {
415                                 *Pointer<Byte>(element + 3) = Byte(0xFF);
416                         }
417                         if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
418                 case FORMAT_G8R8UI:
419                 case FORMAT_G8R8:
420                         if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
421                 case FORMAT_R8UI:
422                 case FORMAT_R8:
423                         if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
424                         break;
425                 case FORMAT_A16B16G16R16I:
426                         if(writeRGBA)
427                         {
428                                 *Pointer<Short4>(element) = Short4(RoundInt(c));
429                         }
430                         else
431                         {
432                                 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
433                                 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
434                                 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
435                                 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
436                         }
437                         break;
438                 case FORMAT_X16B16G16R16I:
439                         if(writeRGBA)
440                         {
441                                 *Pointer<Short4>(element) = Short4(RoundInt(c));
442                         }
443                         else
444                         {
445                                 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
446                                 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
447                                 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
448                         }
449                         if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
450                         break;
451                 case FORMAT_G16R16I:
452                         if(writeR && writeG)
453                         {
454                                 *Pointer<UInt>(element) = UInt(As<Long>(Short4(RoundInt(c))));
455                         }
456                         else
457                         {
458                                 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
459                                 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
460                         }
461                         break;
462                 case FORMAT_R16I:
463                         if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
464                         break;
465                 case FORMAT_A16B16G16R16UI:
466                 case FORMAT_A16B16G16R16:
467                         if(writeRGBA)
468                         {
469                                 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
470                         }
471                         else
472                         {
473                                 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
474                                 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
475                                 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
476                                 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
477                         }
478                         break;
479                 case FORMAT_X16B16G16R16UI:
480                         if(writeRGBA)
481                         {
482                                 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
483                         }
484                         else
485                         {
486                                 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
487                                 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
488                                 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
489                         }
490                         if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
491                         break;
492                 case FORMAT_G16R16UI:
493                 case FORMAT_G16R16:
494                         if(writeR && writeG)
495                         {
496                                 *Pointer<UInt>(element) = UInt(As<Long>(UShort4(RoundInt(c))));
497                         }
498                         else
499                         {
500                                 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
501                                 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
502                         }
503                         break;
504                 case FORMAT_R16UI:
505                         if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
506                         break;
507                 case FORMAT_A32B32G32R32I:
508                         if(writeRGBA)
509                         {
510                                 *Pointer<Int4>(element) = RoundInt(c);
511                         }
512                         else
513                         {
514                                 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
515                                 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
516                                 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
517                                 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
518                         }
519                         break;
520                 case FORMAT_X32B32G32R32I:
521                         if(writeRGBA)
522                         {
523                                 *Pointer<Int4>(element) = RoundInt(c);
524                         }
525                         else
526                         {
527                                 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
528                                 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
529                                 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
530                         }
531                         if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
532                         break;
533                 case FORMAT_G32R32I:
534                         if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
535                 case FORMAT_R32I:
536                         if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
537                         break;
538                 case FORMAT_A32B32G32R32UI:
539                         if(writeRGBA)
540                         {
541                                 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
542                         }
543                         else
544                         {
545                                 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
546                                 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
547                                 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
548                                 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
549                         }
550                         break;
551                 case FORMAT_X32B32G32R32UI:
552                         if(writeRGBA)
553                         {
554                                 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
555                         }
556                         else
557                         {
558                                 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
559                                 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
560                                 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
561                         }
562                         if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
563                         break;
564                 case FORMAT_G32R32UI:
565                         if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
566                 case FORMAT_R32UI:
567                         if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
568                         break;
569                 case FORMAT_R5G6B5:
570                         if(writeR && writeG && writeB)
571                         {
572                                 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
573                                                                   (RoundInt(Float(c.y)) << Int(5)) |
574                                                                   (RoundInt(Float(c.x)) << Int(11)));
575                         }
576                         else
577                         {
578                                 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
579                                 unsigned short unmask = ~mask;
580                                 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | 
581                                                             (UShort(RoundInt(Float(c.z)) |
582                                                                    (RoundInt(Float(c.y)) << Int(5)) |
583                                                                    (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
584                         }
585                         break;
586                 default:
587                         return false;
588                 }
589                 return true;
590         }
591
592         bool Blitter::read(Int4 &c, Pointer<Byte> element, Format format)
593         {
594                 c = Int4(0, 0, 0, 0xFFFFFFFF);
595
596                 switch(format)
597                 {
598                 case FORMAT_A8B8G8R8I:
599                         Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
600                 case FORMAT_X8B8G8R8I:
601                         Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
602                 case FORMAT_G8R8I:
603                         Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
604                 case FORMAT_R8I:
605                         Insert(c, Int(*Pointer<SByte>(element)), 0);
606                         if(format != FORMAT_A8B8G8R8I)
607                         {
608                                 Insert(c, Int(0x7F), 3); // Set alpha
609                         }
610                         break;
611                 case FORMAT_A8B8G8R8UI:
612                         Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
613                 case FORMAT_X8B8G8R8UI:
614                         Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
615                 case FORMAT_G8R8UI:
616                         Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
617                 case FORMAT_R8UI:
618                         Insert(c, Int(*Pointer<Byte>(element)), 0);
619                         if(format != FORMAT_A8B8G8R8UI)
620                         {
621                                 Insert(c, Int(0xFF), 3); // Set alpha
622                         }
623                         break;
624                 case FORMAT_A16B16G16R16I:
625                         Insert(c, Int(*Pointer<Short>(element + 3)), 3);
626                 case FORMAT_X16B16G16R16I:
627                         Insert(c, Int(*Pointer<Short>(element + 2)), 2);
628                 case FORMAT_G16R16I:
629                         Insert(c, Int(*Pointer<Short>(element + 1)), 1);
630                 case FORMAT_R16I:
631                         Insert(c, Int(*Pointer<Short>(element)), 0);
632                         if(format != FORMAT_A16B16G16R16I)
633                         {
634                                 Insert(c, Int(0x7FFF), 3); // Set alpha
635                         }
636                         break;
637                 case FORMAT_A16B16G16R16UI:
638                         Insert(c, Int(*Pointer<UShort>(element + 3)), 3);
639                 case FORMAT_X16B16G16R16UI:
640                         Insert(c, Int(*Pointer<UShort>(element + 2)), 2);
641                 case FORMAT_G16R16UI:
642                         Insert(c, Int(*Pointer<UShort>(element + 1)), 1);
643                 case FORMAT_R16UI:
644                         Insert(c, Int(*Pointer<UShort>(element)), 0);
645                         if(format != FORMAT_A16B16G16R16UI)
646                         {
647                                 Insert(c, Int(0xFFFF), 3); // Set alpha
648                         }
649                         break;
650                 case FORMAT_A32B32G32R32I:
651                         Insert(c, Int(*Pointer<Int>(element + 3)), 3);
652                 case FORMAT_X32B32G32R32I:
653                         Insert(c, Int(*Pointer<Int>(element + 2)), 2);
654                 case FORMAT_G32R32I:
655                         Insert(c, Int(*Pointer<Int>(element + 1)), 1);
656                 case FORMAT_R32I:
657                         Insert(c, Int(*Pointer<Int>(element)), 0);
658                         if(format != FORMAT_A32B32G32R32I)
659                         {
660                                 Insert(c, Int(0x7FFFFFFF), 3); // Set alpha
661                         }
662                         break;
663                 case FORMAT_A32B32G32R32UI:
664                         Insert(c, Int(*Pointer<UInt>(element + 3)), 3);
665                 case FORMAT_X32B32G32R32UI:
666                         Insert(c, Int(*Pointer<UInt>(element + 2)), 2);
667                 case FORMAT_G32R32UI:
668                         Insert(c, Int(*Pointer<UInt>(element + 1)), 1);
669                 case FORMAT_R32UI:
670                         Insert(c, Int(*Pointer<UInt>(element)), 0);
671                         if(format != FORMAT_A32B32G32R32UI)
672                         {
673                                 Insert(c, Int(UInt(0xFFFFFFFFU)), 3); // Set alpha
674                         }
675                         break;
676                 default:
677                         return false;
678                 }
679
680                 return true;
681         }
682
683         bool Blitter::write(Int4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
684         {
685                 bool writeR = (options & WRITE_RED) == WRITE_RED;
686                 bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
687                 bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
688                 bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
689                 bool writeRGBA = writeR && writeG && writeB && writeA;
690
691                 switch(format)
692                 {
693                 case FORMAT_A8B8G8R8I:
694                         if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
695                 case FORMAT_X8B8G8R8I:
696                         if(writeA && (format != FORMAT_A8B8G8R8I))
697                         {
698                                 *Pointer<SByte>(element + 3) = SByte(0x7F);
699                         }
700                         if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
701                 case FORMAT_G8R8I:
702                         if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
703                 case FORMAT_R8I:
704                         if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
705                         break;
706                 case FORMAT_A8B8G8R8UI:
707                         if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
708                 case FORMAT_X8B8G8R8UI:
709                         if(writeA && (format != FORMAT_A8B8G8R8UI))
710                         {
711                                 *Pointer<Byte>(element + 3) = Byte(0xFF);
712                         }
713                         if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
714                 case FORMAT_G8R8UI:
715                         if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
716                 case FORMAT_R8UI:
717                         if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
718                         break;
719                 case FORMAT_A16B16G16R16I:
720                         if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
721                 case FORMAT_X16B16G16R16I:
722                         if(writeA && (format != FORMAT_A16B16G16R16I))
723                         {
724                                 *Pointer<Short>(element + 6) = Short(0x7FFF);
725                         }
726                         if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
727                 case FORMAT_G16R16I:
728                         if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
729                 case FORMAT_R16I:
730                         if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
731                         break;
732                 case FORMAT_A16B16G16R16UI:
733                         if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
734                 case FORMAT_X16B16G16R16UI:
735                         if(writeA && (format != FORMAT_A16B16G16R16UI))
736                         {
737                                 *Pointer<UShort>(element + 6) = UShort(0xFFFF);
738                         }
739                         if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
740                 case FORMAT_G16R16UI:
741                         if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
742                 case FORMAT_R16UI:
743                         if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
744                         break;
745                 case FORMAT_A32B32G32R32I:
746                         if(writeRGBA)
747                         {
748                                 *Pointer<Int4>(element) = c;
749                         }
750                         else
751                         {
752                                 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
753                                 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
754                                 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
755                                 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
756                         }
757                         break;
758                 case FORMAT_X32B32G32R32I:
759                         if(writeRGBA)
760                         {
761                                 *Pointer<Int4>(element) = c;
762                         }
763                         else
764                         {
765                                 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
766                                 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
767                                 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
768                         }
769                         if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
770                         break;
771                 case FORMAT_G32R32I:
772                         if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
773                         if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
774                         break;
775                 case FORMAT_R32I:
776                         if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
777                         break;
778                 case FORMAT_A32B32G32R32UI:
779                         if(writeRGBA)
780                         {
781                                 *Pointer<UInt4>(element) = As<UInt4>(c);
782                         }
783                         else
784                         {
785                                 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
786                                 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
787                                 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
788                                 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
789                         }
790                         break;
791                 case FORMAT_X32B32G32R32UI:
792                         if(writeRGBA)
793                         {
794                                 *Pointer<UInt4>(element) = As<UInt4>(c);
795                         }
796                         else
797                         {
798                                 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
799                                 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
800                                 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
801                         }
802                         if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
803                         break;
804                 case FORMAT_G32R32UI:
805                         if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
806                         if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
807                         break;
808                 case FORMAT_R32UI:
809                         if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
810                         break;
811                 default:
812                         return false;
813                 }
814
815                 return true;
816         }
817
818         bool Blitter::GetScale(float4& scale, Format format)
819         {
820                 switch(format)
821                 {
822                 case FORMAT_L8:
823                 case FORMAT_A8:
824                 case FORMAT_A8R8G8B8:
825                 case FORMAT_X8R8G8B8:
826                 case FORMAT_R8:
827                 case FORMAT_G8R8:
828                 case FORMAT_X8B8G8R8:
829                 case FORMAT_A8B8G8R8:
830                         scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
831                         break;
832                 case FORMAT_R8I_SNORM:
833                 case FORMAT_G8R8I_SNORM:
834                 case FORMAT_X8B8G8R8I_SNORM:
835                 case FORMAT_A8B8G8R8I_SNORM:
836                         scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
837                         break;
838                 case FORMAT_R8I:
839                 case FORMAT_R8UI:
840                 case FORMAT_G8R8I:
841                 case FORMAT_G8R8UI:
842                 case FORMAT_X8B8G8R8I:
843                 case FORMAT_X8B8G8R8UI:
844                 case FORMAT_A8B8G8R8I:
845                 case FORMAT_A8B8G8R8UI:
846                 case FORMAT_R16I:
847                 case FORMAT_R16UI:
848                 case FORMAT_G16R16:
849                 case FORMAT_G16R16I:
850                 case FORMAT_G16R16UI:
851                 case FORMAT_X16B16G16R16I:
852                 case FORMAT_X16B16G16R16UI:
853                 case FORMAT_A16B16G16R16:
854                 case FORMAT_A16B16G16R16I:
855                 case FORMAT_A16B16G16R16UI:
856                 case FORMAT_R32I:
857                 case FORMAT_R32UI:
858                 case FORMAT_G32R32I:
859                 case FORMAT_G32R32UI:
860                 case FORMAT_X32B32G32R32I:
861                 case FORMAT_X32B32G32R32UI:
862                 case FORMAT_A32B32G32R32I:
863                 case FORMAT_A32B32G32R32UI:
864                 case FORMAT_A32B32G32R32F:
865                 case FORMAT_G32R32F:
866                 case FORMAT_R32F:
867                         scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
868                         break;
869                 case FORMAT_R5G6B5:
870                         scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
871                         break;
872                 default:
873                         return false;
874                 }
875
876                 return true;
877         }
878
879         bool Blitter::ApplyScaleAndClamp(Float4& value, const BlitState& state)
880         {
881                 float4 scale, unscale;
882                 if(Surface::isNonNormalizedInteger(state.sourceFormat) &&
883                    !Surface::isNonNormalizedInteger(state.destFormat) &&
884                    (state.options & CLEAR_OPERATION))
885                 {
886                         // If we're clearing a buffer from an int or uint color into a normalized color,
887                         // then the whole range of the int or uint color must be scaled between 0 and 1.
888                         switch(state.sourceFormat)
889                         {
890                         case FORMAT_A32B32G32R32I:
891                                 unscale = vector(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF);
892                                 break;
893                         case FORMAT_A32B32G32R32UI:
894                                 unscale = vector(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
895                                 break;
896                         default:
897                                 return false;
898                         }
899                 }
900                 else if(!GetScale(unscale, state.sourceFormat))
901                 {
902                         return false;
903                 }
904
905                 if(!GetScale(scale, state.destFormat))
906                 {
907                         return false;
908                 }
909
910                 if(unscale != scale)
911                 {
912                         value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
913                 }
914
915                 if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
916                 {
917                         value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
918
919                         value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
920                                                   Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
921                                                   Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
922                                                   Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
923                 }
924
925                 return true;
926         }
927
928         Routine *Blitter::generate(BlitState &state)
929         {
930                 Function<Void, Pointer<Byte> > function;
931                 {
932                         Pointer<Byte> blit(function.arg(0));
933
934                         Pointer<Byte> source = *Pointer<Pointer<Byte> >(blit + OFFSET(BlitData,source));
935                         Pointer<Byte> dest = *Pointer<Pointer<Byte> >(blit + OFFSET(BlitData,dest));
936                         Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
937                         Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
938
939                         Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
940                         Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
941                         Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
942                         Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
943
944                         Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
945                         Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
946                         Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
947                         Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
948
949                         Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
950                         Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
951
952                         bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
953                         bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
954                         bool intBoth = intSrc && intDst;
955
956                         bool hasConstantColorI = false;
957                         Int4 constantColorI;
958                         bool hasConstantColorF = false;
959                         Float4 constantColorF;
960                         if(state.options & CLEAR_OPERATION)
961                         {
962                                 if(intBoth) // Integer types
963                                 {
964                                         if(!read(constantColorI, source, state.sourceFormat))
965                                         {
966                                                 return nullptr;
967                                         }
968                                         hasConstantColorI = true;
969                                 }
970                                 else
971                                 {
972                                         if(!read(constantColorF, source, state.sourceFormat))
973                                         {
974                                                 return nullptr;
975                                         }
976                                         hasConstantColorF = true;
977
978                                         if(!ApplyScaleAndClamp(constantColorF, state))
979                                         {
980                                                 return nullptr;
981                                         }
982                                 }
983                         }
984
985                         Float y = y0;
986
987                         For(Int j = y0d, j < y1d, j++)
988                         {
989                                 Float x = x0;
990                                 Pointer<Byte> destLine = dest + j * dPitchB;
991
992                                 For(Int i = x0d, i < x1d, i++)
993                                 {
994                                         Pointer<Byte> d = destLine + i * Surface::bytes(state.destFormat);
995                                         if(hasConstantColorI)
996                                         {
997                                                 if(!write(constantColorI, d, state.destFormat, state.options))
998                                                 {
999                                                         return nullptr;
1000                                                 }
1001                                         }
1002                                         else if(hasConstantColorF)
1003                                         {
1004                                                 if(!write(constantColorF, d, state.destFormat, state.options))
1005                                                 {
1006                                                         return nullptr;
1007                                                 }
1008                                         }
1009                                         else if(intBoth) // Integer types do not support filtering
1010                                         {
1011                                                 Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1012                                                 Pointer<Byte> s = source + Int(y) * sPitchB + Int(x) * Surface::bytes(state.sourceFormat);
1013                                                 if(!read(color, s, state.sourceFormat))
1014                                                 {
1015                                                         return nullptr;
1016                                                 }
1017
1018                                                 if(!write(color, d, state.destFormat, state.options))
1019                                                 {
1020                                                         return nullptr;
1021                                                 }
1022                                         }
1023                                         else
1024                                         {
1025                                                 Float4 color;
1026
1027                                                 if(!(state.options & FILTER_LINEAR) || intSrc)
1028                                                 {
1029                                                         Int X = Int(x);
1030                                                         Int Y = Int(y);
1031
1032                                                         Pointer<Byte> s = source + Y * sPitchB + X * Surface::bytes(state.sourceFormat);
1033
1034                                                         if(!read(color, s, state.sourceFormat))
1035                                                         {
1036                                                                 return nullptr;
1037                                                         }
1038                                                 }
1039                                                 else   // Bilinear filtering
1040                                                 {
1041                                                         Float x0 = x - 0.5f;
1042                                                         Float y0 = y - 0.5f;
1043
1044                                                         Int X0 = Max(Int(x0), 0);
1045                                                         Int Y0 = Max(Int(y0), 0);
1046
1047                                                         Int X1 = IfThenElse(X0 + 1 >= sWidth, X0, X0 + 1);
1048                                                         Int Y1 = IfThenElse(Y0 + 1 >= sHeight, Y0, Y0 + 1);
1049
1050                                                         Pointer<Byte> s00 = source + Y0 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
1051                                                         Pointer<Byte> s01 = source + Y0 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
1052                                                         Pointer<Byte> s10 = source + Y1 * sPitchB + X0 * Surface::bytes(state.sourceFormat);
1053                                                         Pointer<Byte> s11 = source + Y1 * sPitchB + X1 * Surface::bytes(state.sourceFormat);
1054
1055                                                         Float4 c00; if(!read(c00, s00, state.sourceFormat)) return nullptr;
1056                                                         Float4 c01; if(!read(c01, s01, state.sourceFormat)) return nullptr;
1057                                                         Float4 c10; if(!read(c10, s10, state.sourceFormat)) return nullptr;
1058                                                         Float4 c11; if(!read(c11, s11, state.sourceFormat)) return nullptr;
1059
1060                                                         Float4 fx = Float4(x0 - Float(X0));
1061                                                         Float4 fy = Float4(y0 - Float(Y0));
1062
1063                                                         color = c00 * (Float4(1.0f) - fx) * (Float4(1.0f) - fy) +
1064                                                                 c01 * fx * (Float4(1.0f) - fy) +
1065                                                                 c10 * (Float4(1.0f) - fx) * fy +
1066                                                                 c11 * fx * fy;
1067                                                 }
1068
1069                                                 if(!ApplyScaleAndClamp(color, state) || !write(color, d, state.destFormat, state.options))
1070                                                 {
1071                                                         return nullptr;
1072                                                 }
1073                                         }
1074
1075                                         if(!hasConstantColorI && !hasConstantColorF) { x += w; }
1076                                 }
1077
1078                                 if(!hasConstantColorI && !hasConstantColorF) { y += h; }
1079                         }
1080                 }
1081
1082                 return function(L"BlitRoutine");
1083         }
1084
1085         bool Blitter::blitReactor(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
1086         {
1087                 ASSERT(!(options & CLEAR_OPERATION) || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1088
1089                 Rect dRect = destRect;
1090                 Rect sRect = sourceRect;
1091                 if(destRect.x0 > destRect.x1)
1092                 {
1093                         swap(dRect.x0, dRect.x1);
1094                         swap(sRect.x0, sRect.x1);
1095                 }
1096                 if(destRect.y0 > destRect.y1)
1097                 {
1098                         swap(dRect.y0, dRect.y1);
1099                         swap(sRect.y0, sRect.y1);
1100                 }
1101
1102                 BlitState state;
1103
1104                 bool useSourceInternal = !source->isExternalDirty();
1105                 bool useDestInternal = !dest->isExternalDirty();
1106
1107                 state.sourceFormat = source->getFormat(useSourceInternal);
1108                 state.destFormat = dest->getFormat(useDestInternal);
1109                 state.options = options;
1110
1111                 criticalSection.lock();
1112                 Routine *blitRoutine = blitCache->query(state);
1113                 
1114                 if(!blitRoutine)
1115                 {
1116                         blitRoutine = generate(state);
1117
1118                         if(!blitRoutine)
1119                         {
1120                                 criticalSection.unlock();
1121                                 return false;
1122                         }
1123
1124                         blitCache->add(state, blitRoutine);
1125                 }
1126
1127                 criticalSection.unlock();
1128
1129                 void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1130
1131                 BlitData data;
1132
1133                 bool isRGBA = ((options & WRITE_RGBA) == WRITE_RGBA);
1134                 bool isEntireDest = dest->isEntire(destRect);
1135
1136                 data.source = source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1137                 data.dest = dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1138                 data.sPitchB = source->getPitchB(useSourceInternal);
1139                 data.dPitchB = dest->getPitchB(useDestInternal);
1140
1141                 data.w = 1.0f / (dRect.x1 - dRect.x0) * (sRect.x1 - sRect.x0);
1142                 data.h = 1.0f / (dRect.y1 - dRect.y0) * (sRect.y1 - sRect.y0);
1143                 data.x0 = (float)sRect.x0 + 0.5f * data.w;
1144                 data.y0 = (float)sRect.y0 + 0.5f * data.h;
1145                 
1146                 data.x0d = dRect.x0;
1147                 data.x1d = dRect.x1;
1148                 data.y0d = dRect.y0;
1149                 data.y1d = dRect.y1;
1150
1151                 data.sWidth = source->getWidth();
1152                 data.sHeight = source->getHeight();
1153
1154                 blitFunction(&data);
1155
1156                 source->unlock(useSourceInternal);
1157                 dest->unlock(useDestInternal);
1158
1159                 return true;
1160         }
1161 }