OSDN Git Service

Make Blitter part of Renderer.
[android-x86/external-swiftshader.git] / src / Renderer / Blitter.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Blitter.hpp"
16
17 #include "Reactor/Reactor.hpp"
18 #include "Common/Memory.hpp"
19 #include "Common/Debug.hpp"
20
21 namespace sw
22 {
23         Blitter::Blitter()
24         {
25                 blitCache = new RoutineCache<BlitState>(1024);
26         }
27
28         Blitter::~Blitter()
29         {
30                 delete blitCache;
31         }
32
33         void Blitter::clear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
34         {
35                 if(fastClear(pixel, format, dest, dRect, rgbaMask))
36                 {
37                         return;
38                 }
39
40                 sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
41                 Blitter::Options clearOptions = static_cast<sw::Blitter::Options>((rgbaMask & 0xF) | CLEAR_OPERATION);
42                 SliceRect sRect(dRect);
43                 sRect.slice = 0;
44                 blit(color, sRect, dest, dRect, clearOptions);
45                 delete color;
46         }
47
48         bool Blitter::fastClear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
49         {
50                 if(format != FORMAT_A32B32G32R32F)
51                 {
52                         return false;
53                 }
54
55                 float *color = (float*)pixel;
56                 float r = color[0];
57                 float g = color[1];
58                 float b = color[2];
59                 float a = color[3];
60
61                 uint32_t packed;
62
63                 switch(dest->getFormat())
64                 {
65                 case FORMAT_R5G6B5:
66                         if((rgbaMask & 0x7) != 0x7) return false;
67                         packed = ((uint16_t)(31 * b + 0.5f) << 0) |
68                                  ((uint16_t)(63 * g + 0.5f) << 5) |
69                                  ((uint16_t)(31 * r + 0.5f) << 11);
70                         break;
71                 case FORMAT_X8B8G8R8:
72                         if((rgbaMask & 0x7) != 0x7) return false;
73                         packed = ((uint32_t)(255) << 24) |
74                                  ((uint32_t)(255 * b + 0.5f) << 16) |
75                                  ((uint32_t)(255 * g + 0.5f) << 8) |
76                                  ((uint32_t)(255 * r + 0.5f) << 0);
77                         break;
78                 case FORMAT_A8B8G8R8:
79                         if((rgbaMask & 0xF) != 0xF) return false;
80                         packed = ((uint32_t)(255 * a + 0.5f) << 24) |
81                                  ((uint32_t)(255 * b + 0.5f) << 16) |
82                                  ((uint32_t)(255 * g + 0.5f) << 8) |
83                                  ((uint32_t)(255 * r + 0.5f) << 0);
84                         break;
85                 case FORMAT_X8R8G8B8:
86                         if((rgbaMask & 0x7) != 0x7) return false;
87                         packed = ((uint32_t)(255) << 24) |
88                                  ((uint32_t)(255 * r + 0.5f) << 16) |
89                                  ((uint32_t)(255 * g + 0.5f) << 8) |
90                                  ((uint32_t)(255 * b + 0.5f) << 0);
91                         break;
92                 case FORMAT_A8R8G8B8:
93                         if((rgbaMask & 0xF) != 0xF) return false;
94                         packed = ((uint32_t)(255 * a + 0.5f) << 24) |
95                                  ((uint32_t)(255 * r + 0.5f) << 16) |
96                                  ((uint32_t)(255 * g + 0.5f) << 8) |
97                                  ((uint32_t)(255 * b + 0.5f) << 0);
98                         break;
99                 default:
100                         return false;
101                 }
102
103                 uint8_t *d = (uint8_t*)dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
104
105                 switch(Surface::bytes(dest->getFormat()))
106                 {
107                 case 2:
108                         for(int i = dRect.y0; i < dRect.y1; i++)
109                         {
110                                 sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
111                                 d += dest->getInternalPitchB();
112                         }
113                         break;
114                 case 4:
115                         for(int i = dRect.y0; i < dRect.y1; i++)
116                         {
117                                 sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
118                                 d += dest->getInternalPitchB();
119                         }
120                         break;
121                 default:
122                         assert(false);
123                 }
124
125                 dest->unlockInternal();
126
127                 return true;
128         }
129
130         void Blitter::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
131         {
132                 Blitter::Options options = WRITE_RGBA;
133                 if(filter)
134                 {
135                         options = static_cast<Blitter::Options>(options | FILTER_LINEAR);
136                 }
137                 if(isStencil)
138                 {
139                         options = static_cast<Blitter::Options>(options | USE_STENCIL);
140                 }
141                 blit(source, sRect, dest, dRect, options);
142         }
143
144         void Blitter::blit(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
145         {
146                 if(dest->getInternalFormat() == FORMAT_NULL)
147                 {
148                         return;
149                 }
150
151                 if(blitReactor(source, sourceRect, dest, destRect, options))
152                 {
153                         return;
154                 }
155
156                 SliceRect sRect = sourceRect;
157                 SliceRect dRect = destRect;
158
159                 bool flipX = destRect.x0 > destRect.x1;
160                 bool flipY = destRect.y0 > destRect.y1;
161
162                 if(flipX)
163                 {
164                         swap(dRect.x0, dRect.x1);
165                         swap(sRect.x0, sRect.x1);
166                 }
167                 if(flipY)
168                 {
169                         swap(dRect.y0, dRect.y1);
170                         swap(sRect.y0, sRect.y1);
171                 }
172
173                 source->lockInternal(sRect.x0, sRect.y0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
174                 dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
175
176                 float w = static_cast<float>(sRect.x1 - sRect.x0) / static_cast<float>(dRect.x1 - dRect.x0);
177                 float h = static_cast<float>(sRect.y1 - sRect.y0) / static_cast<float>(dRect.y1 - dRect.y0);
178
179                 const float xStart = (float)sRect.x0 + 0.5f * w;
180                 float y = (float)sRect.y0 + 0.5f * h;
181
182                 for(int j = dRect.y0; j < dRect.y1; j++)
183                 {
184                         float x = xStart;
185
186                         for(int i = dRect.x0; i < dRect.x1; i++)
187                         {
188                                 // FIXME: Support RGBA mask
189                                 dest->copyInternal(source, i, j, x, y, (options & FILTER_LINEAR) == FILTER_LINEAR);
190
191                                 x += w;
192                         }
193
194                         y += h;
195                 }
196
197                 source->unlockInternal();
198                 dest->unlockInternal();
199         }
200
201         void Blitter::blit3D(Surface *source, Surface *dest)
202         {
203                 source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
204                 dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
205
206                 float w = static_cast<float>(source->getWidth())  / static_cast<float>(dest->getWidth());
207                 float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
208                 float d = static_cast<float>(source->getDepth())  / static_cast<float>(dest->getDepth());
209
210                 float z = 0.5f * d;
211                 for(int k = 0; k < dest->getDepth(); ++k)
212                 {
213                         float y = 0.5f * h;
214                         for(int j = 0; j < dest->getHeight(); ++j)
215                         {
216                                 float x = 0.5f * w;
217                                 for(int i = 0; i < dest->getWidth(); ++i)
218                                 {
219                                         dest->copyInternal(source, i, j, k, x, y, z, true);
220                                         x += w;
221                                 }
222                                 y += h;
223                         }
224                         z += d;
225                 }
226
227                 source->unlockInternal();
228                 dest->unlockInternal();
229         }
230
231         bool Blitter::read(Float4 &c, Pointer<Byte> element, Format format)
232         {
233                 c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
234
235                 switch(format)
236                 {
237                 case FORMAT_L8:
238                         c.xyz = Float(Int(*Pointer<Byte>(element)));
239                         c.w = float(0xFF);
240                         break;
241                 case FORMAT_A8:
242                         c.w = Float(Int(*Pointer<Byte>(element)));
243                         break;
244                 case FORMAT_R8I:
245                 case FORMAT_R8I_SNORM:
246                         c.x = Float(Int(*Pointer<SByte>(element)));
247                         c.w = float(0x7F);
248                         break;
249                 case FORMAT_R8:
250                 case FORMAT_R8UI:
251                         c.x = Float(Int(*Pointer<Byte>(element)));
252                         c.w = float(0xFF);
253                         break;
254                 case FORMAT_R16I:
255                         c.x = Float(Int(*Pointer<Short>(element)));
256                         c.w = float(0x7FFF);
257                         break;
258                 case FORMAT_R16UI:
259                         c.x = Float(Int(*Pointer<UShort>(element)));
260                         c.w = float(0xFFFF);
261                         break;
262                 case FORMAT_R32I:
263                         c.x = Float(Int(*Pointer<Int>(element)));
264                         c.w = float(0x7FFFFFFF);
265                         break;
266                 case FORMAT_R32UI:
267                         c.x = Float(Int(*Pointer<UInt>(element)));
268                         c.w = float(0xFFFFFFFF);
269                         break;
270                 case FORMAT_A8R8G8B8:
271                         c = Float4(*Pointer<Byte4>(element)).zyxw;
272                         break;
273                 case FORMAT_A8B8G8R8I:
274                 case FORMAT_A8B8G8R8I_SNORM:
275                         c = Float4(*Pointer<SByte4>(element));
276                         break;
277                 case FORMAT_A8B8G8R8:
278                 case FORMAT_A8B8G8R8UI:
279                 case FORMAT_SRGB8_A8:
280                         c = Float4(*Pointer<Byte4>(element));
281                         break;
282                 case FORMAT_X8R8G8B8:
283                         c = Float4(*Pointer<Byte4>(element)).zyxw;
284                         c.w = float(0xFF);
285                         break;
286                 case FORMAT_R8G8B8:
287                         c.z = Float(Int(*Pointer<Byte>(element + 0)));
288                         c.y = Float(Int(*Pointer<Byte>(element + 1)));
289                         c.x = Float(Int(*Pointer<Byte>(element + 2)));
290                         c.w = float(0xFF);
291                         break;
292                 case FORMAT_B8G8R8:
293                         c.x = Float(Int(*Pointer<Byte>(element + 0)));
294                         c.y = Float(Int(*Pointer<Byte>(element + 1)));
295                         c.z = Float(Int(*Pointer<Byte>(element + 2)));
296                         c.w = float(0xFF);
297                         break;
298                 case FORMAT_X8B8G8R8I:
299                 case FORMAT_X8B8G8R8I_SNORM:
300                         c = Float4(*Pointer<SByte4>(element));
301                         c.w = float(0x7F);
302                         break;
303                 case FORMAT_X8B8G8R8:
304                 case FORMAT_X8B8G8R8UI:
305                 case FORMAT_SRGB8_X8:
306                         c = Float4(*Pointer<Byte4>(element));
307                         c.w = float(0xFF);
308                         break;
309                 case FORMAT_A16B16G16R16I:
310                         c = Float4(*Pointer<Short4>(element));
311                         break;
312                 case FORMAT_A16B16G16R16:
313                 case FORMAT_A16B16G16R16UI:
314                         c = Float4(*Pointer<UShort4>(element));
315                         break;
316                 case FORMAT_X16B16G16R16I:
317                         c = Float4(*Pointer<Short4>(element));
318                         c.w = float(0x7FFF);
319                         break;
320                 case FORMAT_X16B16G16R16UI:
321                         c = Float4(*Pointer<UShort4>(element));
322                         c.w = float(0xFFFF);
323                         break;
324                 case FORMAT_A32B32G32R32I:
325                         c = Float4(*Pointer<Int4>(element));
326                         break;
327                 case FORMAT_A32B32G32R32UI:
328                         c = Float4(*Pointer<UInt4>(element));
329                         break;
330                 case FORMAT_X32B32G32R32I:
331                         c = Float4(*Pointer<Int4>(element));
332                         c.w = float(0x7FFFFFFF);
333                         break;
334                 case FORMAT_X32B32G32R32UI:
335                         c = Float4(*Pointer<UInt4>(element));
336                         c.w = float(0xFFFFFFFF);
337                         break;
338                 case FORMAT_G8R8I:
339                 case FORMAT_G8R8I_SNORM:
340                         c.x = Float(Int(*Pointer<SByte>(element + 0)));
341                         c.y = Float(Int(*Pointer<SByte>(element + 1)));
342                         c.w = float(0x7F);
343                         break;
344                 case FORMAT_G8R8:
345                 case FORMAT_G8R8UI:
346                         c.x = Float(Int(*Pointer<Byte>(element + 0)));
347                         c.y = Float(Int(*Pointer<Byte>(element + 1)));
348                         c.w = float(0xFF);
349                         break;
350                 case FORMAT_G16R16I:
351                         c.x = Float(Int(*Pointer<Short>(element + 0)));
352                         c.y = Float(Int(*Pointer<Short>(element + 2)));
353                         c.w = float(0x7FFF);
354                         break;
355                 case FORMAT_G16R16:
356                 case FORMAT_G16R16UI:
357                         c.x = Float(Int(*Pointer<UShort>(element + 0)));
358                         c.y = Float(Int(*Pointer<UShort>(element + 2)));
359                         c.w = float(0xFFFF);
360                         break;
361                 case FORMAT_G32R32I:
362                         c.x = Float(Int(*Pointer<Int>(element + 0)));
363                         c.y = Float(Int(*Pointer<Int>(element + 4)));
364                         c.w = float(0x7FFFFFFF);
365                         break;
366                 case FORMAT_G32R32UI:
367                         c.x = Float(Int(*Pointer<UInt>(element + 0)));
368                         c.y = Float(Int(*Pointer<UInt>(element + 4)));
369                         c.w = float(0xFFFFFFFF);
370                         break;
371                 case FORMAT_A32B32G32R32F:
372                         c = *Pointer<Float4>(element);
373                         break;
374                 case FORMAT_X32B32G32R32F:
375                 case FORMAT_B32G32R32F:
376                         c.z = *Pointer<Float>(element + 8);
377                 case FORMAT_G32R32F:
378                         c.x = *Pointer<Float>(element + 0);
379                         c.y = *Pointer<Float>(element + 4);
380                         break;
381                 case FORMAT_R32F:
382                         c.x = *Pointer<Float>(element);
383                         break;
384                 case FORMAT_R5G6B5:
385                         c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
386                         c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
387                         c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
388                         break;
389                 case FORMAT_A2B10G10R10:
390                         c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
391                         c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
392                         c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
393                         c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
394                         break;
395                 case FORMAT_D16:
396                         c.x = Float(Int((*Pointer<UShort>(element))));
397                         break;
398                 case FORMAT_D24S8:
399                         c.x = Float(Int((*Pointer<UInt>(element))));
400                         break;
401                 case FORMAT_D32:
402                         c.x = Float(Int((*Pointer<UInt>(element))));
403                         break;
404                 case FORMAT_D32F:
405                         c.x = *Pointer<Float>(element);
406                         break;
407                 case FORMAT_D32F_COMPLEMENTARY:
408                         c.x = 1.0f - *Pointer<Float>(element);
409                         break;
410                 case FORMAT_D32F_LOCKABLE:
411                         c.x = *Pointer<Float>(element);
412                         break;
413                 case FORMAT_D32FS8_TEXTURE:
414                         c.x = *Pointer<Float>(element);
415                         break;
416                 case FORMAT_D32FS8_SHADOW:
417                         c.x = *Pointer<Float>(element);
418                         break;
419                 case FORMAT_S8:
420                         c.x = Float(Int(*Pointer<Byte>(element)));
421                         break;
422                 default:
423                         return false;
424                 }
425
426                 return true;
427         }
428
429         bool Blitter::write(Float4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
430         {
431                 bool writeR = (options & WRITE_RED) == WRITE_RED;
432                 bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
433                 bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
434                 bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
435                 bool writeRGBA = writeR && writeG && writeB && writeA;
436
437                 switch(format)
438                 {
439                 case FORMAT_L8:
440                         *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
441                         break;
442                 case FORMAT_A8:
443                         if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
444                         break;
445                 case FORMAT_A8R8G8B8:
446                         if(writeRGBA)
447                         {
448                                 UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
449                                 *Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
450                         }
451                         else
452                         {
453                                 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
454                                 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
455                                 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
456                                 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
457                         }
458                         break;
459                 case FORMAT_A8B8G8R8:
460                 case FORMAT_SRGB8_A8:
461                         if(writeRGBA)
462                         {
463                                 UShort4 c0 = As<UShort4>(RoundShort4(c));
464                                 *Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
465                         }
466                         else
467                         {
468                                 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
469                                 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
470                                 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
471                                 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
472                         }
473                         break;
474                 case FORMAT_X8R8G8B8:
475                         if(writeRGBA)
476                         {
477                                 UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw)) | UShort4(0x0000, 0x0000, 0x0000, 0xFFFFu);
478                                 *Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
479                         }
480                         else
481                         {
482                                 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
483                                 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
484                                 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
485                                 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
486                         }
487                         break;
488                 case FORMAT_X8B8G8R8:
489                 case FORMAT_SRGB8_X8:
490                         if(writeRGBA)
491                         {
492                                 UShort4 c0 = As<UShort4>(RoundShort4(c)) | UShort4(0x0000, 0x0000, 0x0000, 0xFFFFu);
493                                 *Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
494                         }
495                         else
496                         {
497                                 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
498                                 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
499                                 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
500                                 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
501                         }
502                         break;
503                 case FORMAT_R8G8B8:
504                         if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
505                         if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
506                         if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
507                         break;
508                 case FORMAT_B8G8R8:
509                         if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
510                         if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
511                         if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
512                         break;
513                 case FORMAT_A32B32G32R32F:
514                         if(writeRGBA)
515                         {
516                                 *Pointer<Float4>(element) = c;
517                         }
518                         else
519                         {
520                                 if(writeR) { *Pointer<Float>(element) = c.x; }
521                                 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
522                                 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
523                                 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
524                         }
525                         break;
526                 case FORMAT_X32B32G32R32F:
527                         if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
528                 case FORMAT_B32G32R32F:
529                         if(writeR) { *Pointer<Float>(element) = c.x; }
530                         if(writeG) { *Pointer<Float>(element + 4) = c.y; }
531                         if(writeB) { *Pointer<Float>(element + 8) = c.z; }
532                         break;
533                 case FORMAT_G32R32F:
534                         if(writeR && writeG)
535                         {
536                                 *Pointer<Float2>(element) = Float2(c);
537                         }
538                         else
539                         {
540                                 if(writeR) { *Pointer<Float>(element) = c.x; }
541                                 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
542                         }
543                         break;
544                 case FORMAT_R32F:
545                         if(writeR) { *Pointer<Float>(element) = c.x; }
546                         break;
547                 case FORMAT_A8B8G8R8I:
548                 case FORMAT_A8B8G8R8I_SNORM:
549                         if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
550                 case FORMAT_X8B8G8R8I:
551                 case FORMAT_X8B8G8R8I_SNORM:
552                         if(writeA && (format == FORMAT_X8B8G8R8I || format == FORMAT_X8B8G8R8I_SNORM))
553                         {
554                                 *Pointer<SByte>(element + 3) = SByte(0x7F);
555                         }
556                         if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
557                 case FORMAT_G8R8I:
558                 case FORMAT_G8R8I_SNORM:
559                         if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
560                 case FORMAT_R8I:
561                 case FORMAT_R8I_SNORM:
562                         if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
563                         break;
564                 case FORMAT_A8B8G8R8UI:
565                         if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
566                 case FORMAT_X8B8G8R8UI:
567                         if(writeA && (format == FORMAT_X8B8G8R8UI))
568                         {
569                                 *Pointer<Byte>(element + 3) = Byte(0xFF);
570                         }
571                         if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
572                 case FORMAT_G8R8UI:
573                 case FORMAT_G8R8:
574                         if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
575                 case FORMAT_R8UI:
576                 case FORMAT_R8:
577                         if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
578                         break;
579                 case FORMAT_A16B16G16R16I:
580                         if(writeRGBA)
581                         {
582                                 *Pointer<Short4>(element) = Short4(RoundInt(c));
583                         }
584                         else
585                         {
586                                 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
587                                 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
588                                 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
589                                 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
590                         }
591                         break;
592                 case FORMAT_X16B16G16R16I:
593                         if(writeRGBA)
594                         {
595                                 *Pointer<Short4>(element) = Short4(RoundInt(c));
596                         }
597                         else
598                         {
599                                 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
600                                 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
601                                 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
602                         }
603                         if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
604                         break;
605                 case FORMAT_G16R16I:
606                         if(writeR && writeG)
607                         {
608                                 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
609                         }
610                         else
611                         {
612                                 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
613                                 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
614                         }
615                         break;
616                 case FORMAT_R16I:
617                         if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
618                         break;
619                 case FORMAT_A16B16G16R16UI:
620                 case FORMAT_A16B16G16R16:
621                         if(writeRGBA)
622                         {
623                                 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
624                         }
625                         else
626                         {
627                                 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
628                                 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
629                                 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
630                                 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
631                         }
632                         break;
633                 case FORMAT_X16B16G16R16UI:
634                         if(writeRGBA)
635                         {
636                                 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
637                         }
638                         else
639                         {
640                                 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
641                                 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
642                                 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
643                         }
644                         if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
645                         break;
646                 case FORMAT_G16R16UI:
647                 case FORMAT_G16R16:
648                         if(writeR && writeG)
649                         {
650                                 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
651                         }
652                         else
653                         {
654                                 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
655                                 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
656                         }
657                         break;
658                 case FORMAT_R16UI:
659                         if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
660                         break;
661                 case FORMAT_A32B32G32R32I:
662                         if(writeRGBA)
663                         {
664                                 *Pointer<Int4>(element) = RoundInt(c);
665                         }
666                         else
667                         {
668                                 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
669                                 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
670                                 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
671                                 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
672                         }
673                         break;
674                 case FORMAT_X32B32G32R32I:
675                         if(writeRGBA)
676                         {
677                                 *Pointer<Int4>(element) = RoundInt(c);
678                         }
679                         else
680                         {
681                                 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
682                                 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
683                                 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
684                         }
685                         if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
686                         break;
687                 case FORMAT_G32R32I:
688                         if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
689                 case FORMAT_R32I:
690                         if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
691                         break;
692                 case FORMAT_A32B32G32R32UI:
693                         if(writeRGBA)
694                         {
695                                 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
696                         }
697                         else
698                         {
699                                 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
700                                 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
701                                 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
702                                 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
703                         }
704                         break;
705                 case FORMAT_X32B32G32R32UI:
706                         if(writeRGBA)
707                         {
708                                 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
709                         }
710                         else
711                         {
712                                 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
713                                 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
714                                 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
715                         }
716                         if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
717                         break;
718                 case FORMAT_G32R32UI:
719                         if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
720                 case FORMAT_R32UI:
721                         if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
722                         break;
723                 case FORMAT_R5G6B5:
724                         if(writeR && writeG && writeB)
725                         {
726                                 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
727                                                                   (RoundInt(Float(c.y)) << Int(5)) |
728                                                                   (RoundInt(Float(c.x)) << Int(11)));
729                         }
730                         else
731                         {
732                                 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
733                                 unsigned short unmask = ~mask;
734                                 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
735                                                             (UShort(RoundInt(Float(c.z)) |
736                                                                    (RoundInt(Float(c.y)) << Int(5)) |
737                                                                    (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
738                         }
739                         break;
740                 case FORMAT_A2B10G10R10:
741                         if(writeRGBA)
742                         {
743                                 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
744                                                               (RoundInt(Float(c.y)) << 10) |
745                                                               (RoundInt(Float(c.z)) << 20) |
746                                                               (RoundInt(Float(c.w)) << 30));
747                         }
748                         else
749                         {
750                                 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
751                                                     (writeB ? 0x3FF00000 : 0x0000) |
752                                                     (writeG ? 0x000FFC00 : 0x0000) |
753                                                     (writeR ? 0x000003FF : 0x0000);
754                                 unsigned int unmask = ~mask;
755                                 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
756                                                             (UInt(RoundInt(Float(c.x)) |
757                                                                   (RoundInt(Float(c.y)) << 10) |
758                                                                   (RoundInt(Float(c.z)) << 20) |
759                                                                   (RoundInt(Float(c.w)) << 30)) & UInt(mask));
760                         }
761                         break;
762                 case FORMAT_D16:
763                         *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
764                         break;
765                 case FORMAT_D24S8:
766                         *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
767                         break;
768                 case FORMAT_D32:
769                         *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
770                         break;
771                 case FORMAT_D32F:
772                         *Pointer<Float>(element) = c.x;
773                         break;
774                 case FORMAT_D32F_COMPLEMENTARY:
775                         *Pointer<Float>(element) = 1.0f - c.x;
776                         break;
777                 case FORMAT_D32F_LOCKABLE:
778                         *Pointer<Float>(element) = c.x;
779                         break;
780                 case FORMAT_D32FS8_TEXTURE:
781                         *Pointer<Float>(element) = c.x;
782                         break;
783                 case FORMAT_D32FS8_SHADOW:
784                         *Pointer<Float>(element) = c.x;
785                         break;
786                 case FORMAT_S8:
787                         *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
788                         break;
789                 default:
790                         return false;
791                 }
792                 return true;
793         }
794
795         bool Blitter::read(Int4 &c, Pointer<Byte> element, Format format)
796         {
797                 c = Int4(0, 0, 0, 1);
798
799                 switch(format)
800                 {
801                 case FORMAT_A8B8G8R8I:
802                         c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
803                 case FORMAT_X8B8G8R8I:
804                         c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
805                 case FORMAT_G8R8I:
806                         c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
807                 case FORMAT_R8I:
808                         c = Insert(c, Int(*Pointer<SByte>(element)), 0);
809                         break;
810                 case FORMAT_A8B8G8R8UI:
811                         c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
812                 case FORMAT_X8B8G8R8UI:
813                         c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
814                 case FORMAT_G8R8UI:
815                         c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
816                 case FORMAT_R8UI:
817                         c = Insert(c, Int(*Pointer<Byte>(element)), 0);
818                         break;
819                 case FORMAT_A16B16G16R16I:
820                         c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
821                 case FORMAT_X16B16G16R16I:
822                         c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
823                 case FORMAT_G16R16I:
824                         c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
825                 case FORMAT_R16I:
826                         c = Insert(c, Int(*Pointer<Short>(element)), 0);
827                         break;
828                 case FORMAT_A16B16G16R16UI:
829                         c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
830                 case FORMAT_X16B16G16R16UI:
831                         c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
832                 case FORMAT_G16R16UI:
833                         c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
834                 case FORMAT_R16UI:
835                         c = Insert(c, Int(*Pointer<UShort>(element)), 0);
836                         break;
837                 case FORMAT_A32B32G32R32I:
838                         c = *Pointer<Int4>(element);
839                         break;
840                 case FORMAT_X32B32G32R32I:
841                         c = Insert(c, *Pointer<Int>(element + 8), 2);
842                 case FORMAT_G32R32I:
843                         c = Insert(c, *Pointer<Int>(element + 4), 1);
844                 case FORMAT_R32I:
845                         c = Insert(c, *Pointer<Int>(element), 0);
846                         break;
847                 case FORMAT_A32B32G32R32UI:
848                         c = *Pointer<UInt4>(element);
849                         break;
850                 case FORMAT_X32B32G32R32UI:
851                         c = Insert(c, Int(*Pointer<UInt>(element + 8)), 2);
852                 case FORMAT_G32R32UI:
853                         c = Insert(c, Int(*Pointer<UInt>(element + 4)), 1);
854                 case FORMAT_R32UI:
855                         c = Insert(c, Int(*Pointer<UInt>(element)), 0);
856                         break;
857                 default:
858                         return false;
859                 }
860
861                 return true;
862         }
863
864         bool Blitter::write(Int4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
865         {
866                 bool writeR = (options & WRITE_RED) == WRITE_RED;
867                 bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
868                 bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
869                 bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
870                 bool writeRGBA = writeR && writeG && writeB && writeA;
871
872                 switch(format)
873                 {
874                 case FORMAT_A8B8G8R8I:
875                         if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
876                 case FORMAT_X8B8G8R8I:
877                         if(writeA && (format != FORMAT_A8B8G8R8I))
878                         {
879                                 *Pointer<SByte>(element + 3) = SByte(0x7F);
880                         }
881                         if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
882                 case FORMAT_G8R8I:
883                         if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
884                 case FORMAT_R8I:
885                         if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
886                         break;
887                 case FORMAT_A8B8G8R8UI:
888                         if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
889                 case FORMAT_X8B8G8R8UI:
890                         if(writeA && (format != FORMAT_A8B8G8R8UI))
891                         {
892                                 *Pointer<Byte>(element + 3) = Byte(0xFF);
893                         }
894                         if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
895                 case FORMAT_G8R8UI:
896                         if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
897                 case FORMAT_R8UI:
898                         if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
899                         break;
900                 case FORMAT_A16B16G16R16I:
901                         if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
902                 case FORMAT_X16B16G16R16I:
903                         if(writeA && (format != FORMAT_A16B16G16R16I))
904                         {
905                                 *Pointer<Short>(element + 6) = Short(0x7FFF);
906                         }
907                         if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
908                 case FORMAT_G16R16I:
909                         if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
910                 case FORMAT_R16I:
911                         if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
912                         break;
913                 case FORMAT_A16B16G16R16UI:
914                         if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
915                 case FORMAT_X16B16G16R16UI:
916                         if(writeA && (format != FORMAT_A16B16G16R16UI))
917                         {
918                                 *Pointer<UShort>(element + 6) = UShort(0xFFFF);
919                         }
920                         if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
921                 case FORMAT_G16R16UI:
922                         if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
923                 case FORMAT_R16UI:
924                         if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
925                         break;
926                 case FORMAT_A32B32G32R32I:
927                         if(writeRGBA)
928                         {
929                                 *Pointer<Int4>(element) = c;
930                         }
931                         else
932                         {
933                                 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
934                                 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
935                                 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
936                                 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
937                         }
938                         break;
939                 case FORMAT_X32B32G32R32I:
940                         if(writeRGBA)
941                         {
942                                 *Pointer<Int4>(element) = c;
943                         }
944                         else
945                         {
946                                 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
947                                 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
948                                 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
949                         }
950                         if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
951                         break;
952                 case FORMAT_G32R32I:
953                         if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
954                         if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
955                         break;
956                 case FORMAT_R32I:
957                         if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
958                         break;
959                 case FORMAT_A32B32G32R32UI:
960                         if(writeRGBA)
961                         {
962                                 *Pointer<UInt4>(element) = As<UInt4>(c);
963                         }
964                         else
965                         {
966                                 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
967                                 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
968                                 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
969                                 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
970                         }
971                         break;
972                 case FORMAT_X32B32G32R32UI:
973                         if(writeRGBA)
974                         {
975                                 *Pointer<UInt4>(element) = As<UInt4>(c);
976                         }
977                         else
978                         {
979                                 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
980                                 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
981                                 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
982                         }
983                         if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
984                         break;
985                 case FORMAT_G32R32UI:
986                         if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
987                         if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
988                         break;
989                 case FORMAT_R32UI:
990                         if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
991                         break;
992                 default:
993                         return false;
994                 }
995
996                 return true;
997         }
998
999         bool Blitter::GetScale(float4& scale, Format format)
1000         {
1001                 switch(format)
1002                 {
1003                 case FORMAT_L8:
1004                 case FORMAT_A8:
1005                 case FORMAT_A8R8G8B8:
1006                 case FORMAT_X8R8G8B8:
1007                 case FORMAT_R8:
1008                 case FORMAT_G8R8:
1009                 case FORMAT_R8G8B8:
1010                 case FORMAT_B8G8R8:
1011                 case FORMAT_X8B8G8R8:
1012                 case FORMAT_A8B8G8R8:
1013                 case FORMAT_SRGB8_X8:
1014                 case FORMAT_SRGB8_A8:
1015                         scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
1016                         break;
1017                 case FORMAT_R8I_SNORM:
1018                 case FORMAT_G8R8I_SNORM:
1019                 case FORMAT_X8B8G8R8I_SNORM:
1020                 case FORMAT_A8B8G8R8I_SNORM:
1021                         scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
1022                         break;
1023                 case FORMAT_A16B16G16R16:
1024                         scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
1025                         break;
1026                 case FORMAT_R8I:
1027                 case FORMAT_R8UI:
1028                 case FORMAT_G8R8I:
1029                 case FORMAT_G8R8UI:
1030                 case FORMAT_X8B8G8R8I:
1031                 case FORMAT_X8B8G8R8UI:
1032                 case FORMAT_A8B8G8R8I:
1033                 case FORMAT_A8B8G8R8UI:
1034                 case FORMAT_R16I:
1035                 case FORMAT_R16UI:
1036                 case FORMAT_G16R16:
1037                 case FORMAT_G16R16I:
1038                 case FORMAT_G16R16UI:
1039                 case FORMAT_X16B16G16R16I:
1040                 case FORMAT_X16B16G16R16UI:
1041                 case FORMAT_A16B16G16R16I:
1042                 case FORMAT_A16B16G16R16UI:
1043                 case FORMAT_R32I:
1044                 case FORMAT_R32UI:
1045                 case FORMAT_G32R32I:
1046                 case FORMAT_G32R32UI:
1047                 case FORMAT_X32B32G32R32I:
1048                 case FORMAT_X32B32G32R32UI:
1049                 case FORMAT_A32B32G32R32I:
1050                 case FORMAT_A32B32G32R32UI:
1051                 case FORMAT_A32B32G32R32F:
1052                 case FORMAT_X32B32G32R32F:
1053                 case FORMAT_B32G32R32F:
1054                 case FORMAT_G32R32F:
1055                 case FORMAT_R32F:
1056                         scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1057                         break;
1058                 case FORMAT_R5G6B5:
1059                         scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
1060                         break;
1061                 case FORMAT_A2B10G10R10:
1062                         scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
1063                         break;
1064                 case FORMAT_D16:
1065                         scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
1066                         break;
1067                 case FORMAT_D24S8:
1068                         scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
1069                         break;
1070                 case FORMAT_D32:
1071                         scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f);
1072                         break;
1073                 case FORMAT_D32F:
1074                 case FORMAT_D32F_COMPLEMENTARY:
1075                 case FORMAT_D32F_LOCKABLE:
1076                 case FORMAT_D32FS8_TEXTURE:
1077                 case FORMAT_D32FS8_SHADOW:
1078                 case FORMAT_S8:
1079                         scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1080                         break;
1081                 default:
1082                         return false;
1083                 }
1084
1085                 return true;
1086         }
1087
1088         bool Blitter::ApplyScaleAndClamp(Float4& value, const BlitState& state)
1089         {
1090                 float4 scale, unscale;
1091                 if(Surface::isNonNormalizedInteger(state.sourceFormat) &&
1092                    !Surface::isNonNormalizedInteger(state.destFormat) &&
1093                    (state.options & CLEAR_OPERATION))
1094                 {
1095                         // If we're clearing a buffer from an int or uint color into a normalized color,
1096                         // then the whole range of the int or uint color must be scaled between 0 and 1.
1097                         switch(state.sourceFormat)
1098                         {
1099                         case FORMAT_A32B32G32R32I:
1100                                 unscale = replicate(static_cast<float>(0x7FFFFFFF));
1101                                 break;
1102                         case FORMAT_A32B32G32R32UI:
1103                                 unscale = replicate(static_cast<float>(0xFFFFFFFF));
1104                                 break;
1105                         default:
1106                                 return false;
1107                         }
1108                 }
1109                 else if(!GetScale(unscale, state.sourceFormat))
1110                 {
1111                         return false;
1112                 }
1113
1114                 if(!GetScale(scale, state.destFormat))
1115                 {
1116                         return false;
1117                 }
1118
1119                 if(unscale != scale)
1120                 {
1121                         value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1122                 }
1123
1124                 if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
1125                 {
1126                         value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1127
1128                         value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
1129                                                   Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
1130                                                   Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
1131                                                   Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
1132                 }
1133
1134                 return true;
1135         }
1136
1137         Int Blitter::ComputeOffset(Int& x, Int& y, Int& pitchB, int bytes, bool quadLayout)
1138         {
1139                 return (quadLayout ? (y & Int(~1)) : RValue<Int>(y)) * pitchB +
1140                        (quadLayout ? ((y & Int(1)) << 1) + (x * 2) - (x & Int(1)) : RValue<Int>(x)) * bytes;
1141         }
1142
1143         Routine *Blitter::generate(BlitState &state)
1144         {
1145                 Function<Void(Pointer<Byte>)> function;
1146                 {
1147                         Pointer<Byte> blit(function.Arg<0>());
1148
1149                         Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1150                         Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1151                         Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1152                         Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1153
1154                         Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1155                         Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1156                         Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1157                         Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1158
1159                         Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1160                         Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1161                         Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1162                         Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1163
1164                         Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1165                         Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1166
1167                         bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
1168                         bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
1169                         bool intBoth = intSrc && intDst;
1170                         bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat);
1171                         bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat);
1172                         int srcBytes = Surface::bytes(state.sourceFormat);
1173                         int dstBytes = Surface::bytes(state.destFormat);
1174
1175                         bool hasConstantColorI = false;
1176                         Int4 constantColorI;
1177                         bool hasConstantColorF = false;
1178                         Float4 constantColorF;
1179                         if(state.options & CLEAR_OPERATION)
1180                         {
1181                                 if(intBoth) // Integer types
1182                                 {
1183                                         if(!read(constantColorI, source, state.sourceFormat))
1184                                         {
1185                                                 return nullptr;
1186                                         }
1187                                         hasConstantColorI = true;
1188                                 }
1189                                 else
1190                                 {
1191                                         if(!read(constantColorF, source, state.sourceFormat))
1192                                         {
1193                                                 return nullptr;
1194                                         }
1195                                         hasConstantColorF = true;
1196
1197                                         if(!ApplyScaleAndClamp(constantColorF, state))
1198                                         {
1199                                                 return nullptr;
1200                                         }
1201                                 }
1202                         }
1203
1204                         Float y = y0;
1205
1206                         For(Int j = y0d, j < y1d, j++)
1207                         {
1208                                 Float x = x0;
1209                                 Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1210
1211                                 For(Int i = x0d, i < x1d, i++)
1212                                 {
1213                                         Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1214                                         if(hasConstantColorI)
1215                                         {
1216                                                 if(!write(constantColorI, d, state.destFormat, state.options))
1217                                                 {
1218                                                         return nullptr;
1219                                                 }
1220                                         }
1221                                         else if(hasConstantColorF)
1222                                         {
1223                                                 if(!write(constantColorF, d, state.destFormat, state.options))
1224                                                 {
1225                                                         return nullptr;
1226                                                 }
1227                                         }
1228                                         else if(intBoth) // Integer types do not support filtering
1229                                         {
1230                                                 Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1231                                                 Int X = Int(x);
1232                                                 Int Y = Int(y);
1233
1234                                                 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1235
1236                                                 if(!read(color, s, state.sourceFormat))
1237                                                 {
1238                                                         return nullptr;
1239                                                 }
1240
1241                                                 if(!write(color, d, state.destFormat, state.options))
1242                                                 {
1243                                                         return nullptr;
1244                                                 }
1245                                         }
1246                                         else
1247                                         {
1248                                                 Float4 color;
1249
1250                                                 if(!(state.options & FILTER_LINEAR) || intSrc)
1251                                                 {
1252                                                         Int X = Int(x);
1253                                                         Int Y = Int(y);
1254
1255                                                         Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1256
1257                                                         if(!read(color, s, state.sourceFormat))
1258                                                         {
1259                                                                 return nullptr;
1260                                                         }
1261                                                 }
1262                                                 else   // Bilinear filtering
1263                                                 {
1264                                                         Float x0 = x - 0.5f;
1265                                                         Float y0 = y - 0.5f;
1266
1267                                                         Int X0 = Max(Int(x0), 0);
1268                                                         Int Y0 = Max(Int(y0), 0);
1269
1270                                                         Int X1 = IfThenElse(X0 + 1 >= sWidth, X0, X0 + 1);
1271                                                         Int Y1 = IfThenElse(Y0 + 1 >= sHeight, Y0, Y0 + 1);
1272
1273                                                         Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1274                                                         Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1275                                                         Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1276                                                         Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1277
1278                                                         Float4 c00; if(!read(c00, s00, state.sourceFormat)) return nullptr;
1279                                                         Float4 c01; if(!read(c01, s01, state.sourceFormat)) return nullptr;
1280                                                         Float4 c10; if(!read(c10, s10, state.sourceFormat)) return nullptr;
1281                                                         Float4 c11; if(!read(c11, s11, state.sourceFormat)) return nullptr;
1282
1283                                                         Float4 fx = Float4(x0 - Float(X0));
1284                                                         Float4 fy = Float4(y0 - Float(Y0));
1285
1286                                                         color = c00 * (Float4(1.0f) - fx) * (Float4(1.0f) - fy) +
1287                                                                 c01 * fx * (Float4(1.0f) - fy) +
1288                                                                 c10 * (Float4(1.0f) - fx) * fy +
1289                                                                 c11 * fx * fy;
1290                                                 }
1291
1292                                                 if(!ApplyScaleAndClamp(color, state) || !write(color, d, state.destFormat, state.options))
1293                                                 {
1294                                                         return nullptr;
1295                                                 }
1296                                         }
1297
1298                                         if(!hasConstantColorI && !hasConstantColorF) { x += w; }
1299                                 }
1300
1301                                 if(!hasConstantColorI && !hasConstantColorF) { y += h; }
1302                         }
1303                 }
1304
1305                 return function(L"BlitRoutine");
1306         }
1307
1308         bool Blitter::blitReactor(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
1309         {
1310                 ASSERT(!(options & CLEAR_OPERATION) || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1311
1312                 Rect dRect = destRect;
1313                 Rect sRect = sourceRect;
1314                 if(destRect.x0 > destRect.x1)
1315                 {
1316                         swap(dRect.x0, dRect.x1);
1317                         swap(sRect.x0, sRect.x1);
1318                 }
1319                 if(destRect.y0 > destRect.y1)
1320                 {
1321                         swap(dRect.y0, dRect.y1);
1322                         swap(sRect.y0, sRect.y1);
1323                 }
1324
1325                 BlitState state;
1326
1327                 bool useSourceInternal = !source->isExternalDirty();
1328                 bool useDestInternal = !dest->isExternalDirty();
1329                 bool isStencil = ((options & USE_STENCIL) == USE_STENCIL);
1330
1331                 state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal);
1332                 state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal);
1333                 state.options = options;
1334
1335                 criticalSection.lock();
1336                 Routine *blitRoutine = blitCache->query(state);
1337
1338                 if(!blitRoutine)
1339                 {
1340                         blitRoutine = generate(state);
1341
1342                         if(!blitRoutine)
1343                         {
1344                                 criticalSection.unlock();
1345                                 return false;
1346                         }
1347
1348                         blitCache->add(state, blitRoutine);
1349                 }
1350
1351                 criticalSection.unlock();
1352
1353                 void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1354
1355                 BlitData data;
1356
1357                 bool isRGBA = ((options & WRITE_RGBA) == WRITE_RGBA);
1358                 bool isEntireDest = dest->isEntire(destRect);
1359
1360                 data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) :
1361                                           source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1362                 data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) :
1363                                         dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1364                 data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal);
1365                 data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal);
1366
1367                 data.w = 1.0f / (dRect.x1 - dRect.x0) * (sRect.x1 - sRect.x0);
1368                 data.h = 1.0f / (dRect.y1 - dRect.y0) * (sRect.y1 - sRect.y0);
1369                 data.x0 = (float)sRect.x0 + 0.5f * data.w;
1370                 data.y0 = (float)sRect.y0 + 0.5f * data.h;
1371
1372                 data.x0d = dRect.x0;
1373                 data.x1d = dRect.x1;
1374                 data.y0d = dRect.y0;
1375                 data.y1d = dRect.y1;
1376
1377                 data.sWidth = source->getWidth();
1378                 data.sHeight = source->getHeight();
1379
1380                 blitFunction(&data);
1381
1382                 if(isStencil)
1383                 {
1384                         source->unlockStencil();
1385                         dest->unlockStencil();
1386                 }
1387                 else
1388                 {
1389                         source->unlock(useSourceInternal);
1390                         dest->unlock(useDestInternal);
1391                 }
1392
1393                 return true;
1394         }
1395 }