OSDN Git Service

Refactor the code to provide a better framework for compression techniques; also...
[android-x86/external-s2tc.git] / s2tc_algorithm.cpp
1 /*
2  * Copyright (C) 2011  Rudolf Polzer   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21 #define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
22 #include "s2tc_license.h"
23
24 #include <math.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <algorithm>
30 #include <iostream>
31
32 #include "s2tc_algorithm.h"
33 #include "s2tc_common.h"
34
35 namespace
36 {
37         struct color_t
38         {
39                 signed char r, g, b;
40
41                 inline color_t(): r(0), g(0), b(0)
42                 {
43                 }
44
45                 inline color_t(signed char r_, signed char g_, signed char b_): r(r_), g(g_), b(b_)
46                 {
47                 }
48
49                 inline color_t(int i): r(i >> 3), g(i >> 2), b(i >> 3)
50                 {
51                 }
52
53                 inline bool operator<(const color_t &c)
54                 {
55                         signed char d;
56                         d = r - c.r;
57                         if(d)
58                                 return d < 0;
59                         d = g - c.g;
60                         if(d)
61                                 return d < 0;
62                         d = b - c.b;
63                         return d < 0;
64                 }
65         };
66
67         struct bigcolor_t
68         {
69                 int r, g, b;
70
71                 inline bigcolor_t(): r(0), g(0), b(0)
72                 {
73                 }
74
75                 inline bigcolor_t &operator+=(const color_t &c)
76                 {
77                         r += c.r;
78                         g += c.g;
79                         b += c.b;
80                         return *this;
81                 }
82
83                 inline bigcolor_t &operator+=(int v)
84                 {
85                         r += v;
86                         g += v;
87                         b += v;
88                         return *this;
89                 }
90
91                 inline bigcolor_t operator+(int v)
92                 {
93                         bigcolor_t out = *this;
94                         out += v;
95                         return out;
96                 }
97
98                 inline bigcolor_t &operator/=(int v)
99                 {
100                         r /= v;
101                         g /= v;
102                         b /= v;
103                         return *this;
104                 }
105
106                 inline bigcolor_t operator/(int v)
107                 {
108                         bigcolor_t out = *this;
109                         out /= v;
110                         return out;
111                 }
112
113                 inline bigcolor_t &operator<<=(int v)
114                 {
115                         r <<= v;
116                         g <<= v;
117                         b <<= v;
118                         return *this;
119                 }
120
121                 inline bigcolor_t operator<<(int v)
122                 {
123                         bigcolor_t out = *this;
124                         out <<= v;
125                         return out;
126                 }
127
128                 inline operator color_t()
129                 {
130                         color_t out;
131                         out.r = r & 31;
132                         out.g = g & 63;
133                         out.b = b & 31;
134                         return out;
135                 }
136         };
137
138         std::ostream &operator<<(std::ostream &ost, const color_t &c)
139         {
140                 return ost << "color_t(" << int(c.r) << ", " << int(c.g) << ", " << int(c.b) << ")";
141         }
142
143         std::ostream &operator<<(std::ostream &ost, const bigcolor_t &c)
144         {
145                 return ost << "bigcolor_t(" << c.r << ", " << c.g << ", " << c.b << ")";
146         }
147
148         // 16 differences must fit in int
149         // i.e. a difference must be lower than 2^27
150
151         // shift right, rounded
152 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
153
154         inline int color_dist_avg(const color_t &a, const color_t &b)
155         {
156                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
157                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
158                 int db = a.b - b.b; // multiplier: 31 (-1..1)
159                 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
160         }
161
162         inline int color_dist_wavg(const color_t &a, const color_t &b)
163         {
164                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
165                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
166                 int db = a.b - b.b; // multiplier: 31 (-1..1)
167                 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
168                 // weighted 4:16:1
169         }
170
171         inline int color_dist_yuv(const color_t &a, const color_t &b)
172         {
173                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
174                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
175                 int db = a.b - b.b; // multiplier: 31 (-1..1)
176                 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
177                 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
178                 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
179                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
180                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
181                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
182         }
183
184         inline int color_dist_rgb(const color_t &a, const color_t &b)
185         {
186                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
187                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
188                 int db = a.b - b.b; // multiplier: 31 (-1..1)
189                 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
190                 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
191                 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
192                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
193                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
194                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
195         }
196
197         inline int color_dist_srgb(const color_t &a, const color_t &b)
198         {
199                 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
200                 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
201                 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
202                 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
203                 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
204                 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
205                 int sy = SHRR(y, 3) * SHRR(y, 4);
206                 int su = SHRR(u, 3) * SHRR(u, 4);
207                 int sv = SHRR(v, 3) * SHRR(v, 4);
208                 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
209                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
210                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
211         }
212
213         inline int srgb_get_y(const color_t &a)
214         {
215                 // convert to linear
216                 int r = a.r * (int) a.r;
217                 int g = a.g * (int) a.g;
218                 int b = a.b * (int) a.b;
219                 // find luminance
220                 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
221                 // square root it (!)
222                 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
223                 return y;
224         }
225
226         inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
227         {
228                 // get Y
229                 int ay = srgb_get_y(a);
230                 int by = srgb_get_y(b);
231                 // get UV
232                 int au = a.r * 191 - ay;
233                 int av = a.b * 191 - ay;
234                 int bu = b.r * 191 - by;
235                 int bv = b.b * 191 - by;
236                 // get differences
237                 int y = ay - by;
238                 int u = au - bu;
239                 int v = av - bv;
240                 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
241                 // weight for u: ???
242                 // weight for v: ???
243         }
244
245         inline int color_dist_normalmap(const color_t &a, const color_t &b)
246         {
247                 float ca[3], cb[3], n;
248                 ca[0] = a.r / 31.0f * 2 - 1;
249                 ca[1] = a.g / 63.0f * 2 - 1;
250                 ca[2] = a.b / 31.0f * 2 - 1;
251                 cb[0] = b.r / 31.0f * 2 - 1;
252                 cb[1] = b.g / 63.0f * 2 - 1;
253                 cb[2] = b.b / 31.0f * 2 - 1;
254                 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
255                 if(n > 0)
256                 {
257                         n = 1.0f / sqrtf(n);
258                         ca[0] *= n;
259                         ca[1] *= n;
260                         ca[2] *= n;
261                 }
262                 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
263                 if(n > 0)
264                 {
265                         n = 1.0f / sqrtf(n);
266                         cb[0] *= n;
267                         cb[1] *= n;
268                         cb[2] *= n;
269                 }
270
271                 return
272                         100000 *
273                         (
274                                 (cb[0] - ca[0]) * (cb[0] - ca[0])
275                                 +
276                                 (cb[1] - ca[1]) * (cb[1] - ca[1])
277                                 +
278                                 (cb[2] - ca[2]) * (cb[2] - ca[2])
279                         )
280                         ;
281                 // max value: 1000 * (4 + 4 + 4) = 6000
282         }
283
284         typedef int ColorDistFunc(const color_t &a, const color_t &b);
285
286         inline int alpha_dist(unsigned char a, unsigned char b)
287         {
288                 return (a - (int) b) * (a - (int) b);
289         }
290
291         template <class T, class F>
292         // n: input count
293         // m: total color count (including non-counted inputs)
294         // m >= n
295         inline void reduce_colors_inplace(T *c, int n, int m, F dist)
296         {
297                 int i, j, k;
298                 int bestsum = -1;
299                 int besti = 0;
300                 int bestj = 1;
301                 int dists[m][n];
302                 // first the square
303                 for(i = 0; i < n; ++i)
304                 {
305                         dists[i][i] = 0;
306                         for(j = i+1; j < n; ++j)
307                         {
308                                 int d = dist(c[i], c[j]);
309                                 dists[i][j] = dists[j][i] = d;
310                         }
311                 }
312                 // then the box
313                 for(; i < m; ++i)
314                 {
315                         for(j = 0; j < n; ++j)
316                         {
317                                 int d = dist(c[i], c[j]);
318                                 dists[i][j] = d;
319                         }
320                 }
321                 for(i = 0; i < m; ++i)
322                         for(j = i+1; j < m; ++j)
323                         {
324                                 int sum = 0;
325                                 for(k = 0; k < n; ++k)
326                                 {
327                                         int di = dists[i][k];
328                                         int dj = dists[j][k];
329                                         int m  = min(di, dj);
330                                         sum += m;
331                                 }
332                                 if(bestsum < 0 || sum < bestsum)
333                                 {
334                                         bestsum = sum;
335                                         besti = i;
336                                         bestj = j;
337                                 }
338                         }
339                 if(besti != 0)
340                         c[0] = c[besti];
341                 if(bestj != 1)
342                         c[1] = c[bestj];
343         }
344         template <class T, class F>
345         inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
346         {
347                 // TODO fix this for ramp encoding!
348                 int i, j, k;
349                 int bestsum = -1;
350                 int besti = 0;
351                 int bestj = 1;
352                 int dists[m+2][n];
353                 // first the square
354                 for(i = 0; i < n; ++i)
355                 {
356                         dists[i][i] = 0;
357                         for(j = i+1; j < n; ++j)
358                         {
359                                 int d = dist(c[i], c[j]);
360                                 dists[i][j] = dists[j][i] = d;
361                         }
362                 }
363                 // then the box
364                 for(; i < m; ++i)
365                 {
366                         for(j = 0; j < n; ++j)
367                         {
368                                 int d = dist(c[i], c[j]);
369                                 dists[i][j] = d;
370                         }
371                 }
372                 // then the two extra rows
373                 for(j = 0; j < n; ++j)
374                 {
375                         int d = dist(fix0, c[j]);
376                         dists[m][j] = d;
377                 }
378                 for(j = 0; j < n; ++j)
379                 {
380                         int d = dist(fix1, c[j]);
381                         dists[m+1][j] = d;
382                 }
383                 for(i = 0; i < m; ++i)
384                         for(j = i+1; j < m; ++j)
385                         {
386                                 int sum = 0;
387                                 for(k = 0; k < n; ++k)
388                                 {
389                                         int di = dists[i][k];
390                                         int dj = dists[j][k];
391                                         int d0 = dists[m][k];
392                                         int d1 = dists[m+1][k];
393                                         int m  = min(min(di, dj), min(d0, d1));
394                                         sum += m;
395                                 }
396                                 if(bestsum < 0 || sum < bestsum)
397                                 {
398                                         bestsum = sum;
399                                         besti = i;
400                                         bestj = j;
401                                 }
402                         }
403                 if(besti != 0)
404                         c[0] = c[besti];
405                 if(bestj != 1)
406                         c[1] = c[bestj];
407         }
408
409         enum CompressionMode
410         {
411                 MODE_NORMAL,
412                 MODE_FAST
413         };
414
415         template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
416         {
417                 return comp;
418         }
419         template<> inline int refine_component_encode<color_dist_srgb>(int comp)
420         {
421                 return comp * comp;
422         }
423         template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
424         {
425                 return comp * comp;
426         }
427
428         template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
429         {
430                 return comp;
431         }
432         template<> inline int refine_component_decode<color_dist_srgb>(int comp)
433         {
434                 return sqrtf(comp) + 0.5f;
435         }
436         template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
437         {
438                 return sqrtf(comp) + 0.5f;
439         }
440
441         template <class T, class Big, int scale_l>
442         struct s2tc_evaluate_colors_result_t;
443
444         template <class T, class Big>
445         struct s2tc_evaluate_colors_result_t<T, Big, 1>
446         {
447                 // uses:
448                 //   Big << int
449                 //   Big / int
450                 //   Big + int
451                 //   Big += T
452                 int n0, n1;
453                 Big S0, S1;
454                 inline s2tc_evaluate_colors_result_t():
455                         n0(), n1(), S0(), S1()
456                 {
457                 }
458                 inline void add(int l, T a)
459                 {
460                         if(l)
461                         {
462                                 ++n1;
463                                 S1 += a;
464                         }
465                         else
466                         {
467                                 ++n0;
468                                 S0 += a;
469                         }
470                 }
471                 inline bool evaluate(T &a, T &b)
472                 {
473                         if(!n0 && !n1)
474                                 return false;
475                         if(n0)
476                                 a = ((S0 << 1) + n0) / (n0 << 1);
477                         if(n1)
478                                 b = ((S1 << 1) + n1) / (n1 << 1);
479                         return true;
480                 }
481         };
482
483         template <class T, class Big, int scale_l>
484         struct s2tc_evaluate_colors_result_t
485         {
486                 // a possible implementation of inferred color/alpha values
487                 // refining would go here
488         };
489
490         template <class T>
491         struct s2tc_evaluate_colors_result_null_t
492         {
493                 inline void add(int l, T a)
494                 {
495                 }
496         };
497
498         template<class T> T get(const unsigned char *buf)
499         {
500                 T c;
501                 c.r = buf[0];
502                 c.g = buf[1];
503                 c.b = buf[2];
504                 return c;
505         }
506         template<> unsigned char get<unsigned char>(const unsigned char *buf)
507         {
508                 return buf[3]; // extract alpha
509         }
510
511         template<class T, class Big, int bpp, bool have_trans, bool have_0_255, int n_input, class Dist, class Eval, class Arr>
512         inline unsigned int s2tc_try_encode_block(
513                         Arr &out,
514                         Eval &res,
515                         Dist ColorDist,
516                         const unsigned char *in, int iw, int w, int h,
517                         const T colors_ref[])
518         {
519                 unsigned int score = 0;
520                 for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
521                 {
522                         int i = y * 4 + x;
523                         const unsigned char *pix = &in[(y * iw + x) * 4];
524
525                         if(have_trans)
526                         {
527                                 if(pix[3] == 0)
528                                 {
529                                         out.do_or(i, (1 << bpp) - 1);
530                                         continue;
531                                 }
532                         }
533
534                         T color(get<T>(pix));
535                         int best = 0;
536                         int bestdist = ColorDist(color, colors_ref[0]);
537                         for(int k = 1; k < n_input; ++k)
538                         {
539                                 int dist = ColorDist(color, colors_ref[k]);
540                                 if(dist < bestdist)
541                                 {
542                                         bestdist = dist;
543                                         best = k;
544                                 }
545                         }
546                         if(have_0_255)
547                         {
548                                 int dist_0 = ColorDist(color, 0);
549                                 if(dist_0 <= bestdist)
550                                 {
551                                         bestdist = dist_0;
552                                         out.do_or(i, (1 << bpp) - 2);
553                                         score += bestdist;
554                                         continue;
555                                 }
556                                 int dist_255 = ColorDist(color, 255);
557                                 if(dist_255 <= bestdist)
558                                 {
559                                         bestdist = dist_255;
560                                         out.do_or(i, (1 << bpp) - 1);
561                                         score += bestdist;
562                                         continue;
563                                 }
564                         }
565
566                         // record
567                         res.add(best, color);
568                         out.do_or(i, best);
569                         score += bestdist;
570                 }
571                 return score;
572         }
573
574         // REFINE_LOOP: refine, take result over only if score improved, loop until it did not
575         inline void s2tc_dxt5_encode_alpha_refine_loop(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
576         {
577                 bitarray<uint64_t, 16, 3> out2;
578                 unsigned char a0next = a0, a1next = a1;
579                 unsigned int s = 0x7FFFFFFF;
580                 for(;;)
581                 {
582                         unsigned char ramp[2] = {
583                                 a0next,
584                                 a1next
585                         };
586                         s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
587                         unsigned int s2 = s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out2, r2, alpha_dist, in, iw, w, h, ramp);
588                         if(s2 < s)
589                         {
590                                 out = out2;
591                                 s = s2;
592                                 a0 = a0next;
593                                 a1 = a1next;
594                                 if(!r2.evaluate(a0next, a1next))
595                                         break;
596                         }
597                         else
598                                 break;
599                         out2.clear();
600                 }
601                 if(a1 < a0)
602                 {
603                         std::swap(a0, a1);
604                         for(int i = 0; i < 16; ++i) switch(out.get(i))
605                         {
606                                 case 0:
607                                         out.set(i, 1);
608                                         break;
609                                 case 1:
610                                         out.set(i, 0);
611                                         break;
612                                 case 6:
613                                 case 7:
614                                         break;
615                                 default:
616                                         out.set(i, 7 - out.get(i));
617                                         break;
618                         }
619                 }
620         }
621
622         // REFINE_ALWAYS: refine, do not check
623         inline void s2tc_dxt5_encode_alpha_refine_always(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
624         {
625                 unsigned char ramp[2] = {
626                         a0,
627                         a1
628                 };
629                 s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
630                 s2tc_try_encode_block<unsigned char, int, 3, false, true, 6>(out, r2, alpha_dist, in, iw, w, h, ramp);
631                 r2.evaluate(a0, a1);
632
633                 if(a1 < a0)
634                 {
635                         std::swap(a0, a1);
636                         for(int i = 0; i < 16; ++i) switch(out.get(i))
637                         {
638                                 case 0:
639                                         out.set(i, 1);
640                                         break;
641                                 case 1:
642                                         out.set(i, 0);
643                                         break;
644                                 case 6:
645                                 case 7:
646                                         break;
647                                 default:
648                                         out.set(i, 7 - out.get(i));
649                                         break;
650                         }
651                 }
652         }
653
654         // REFINE_NEVER: do not refine
655         inline void s2tc_dxt5_encode_alpha_refine_never(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
656         {
657                 if(a1 < a0)
658                         std::swap(a0, a1);
659                 unsigned char ramp[6] = {
660                         a0,
661                         a1
662                 };
663                 s2tc_evaluate_colors_result_null_t<unsigned char> r2;
664                 s2tc_try_encode_block<unsigned char, int, 3, false, true, 6>(out, r2, alpha_dist, in, iw, w, h, ramp);
665         }
666
667         // REFINE_LOOP: refine, take result over only if score improved, loop until it did not
668         template<ColorDistFunc ColorDist, bool have_trans>
669         inline void s2tc_dxt1_encode_color_refine_loop(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
670         {
671                 bitarray<uint32_t, 16, 2> out2;
672                 color_t c0next = c0, c1next = c1;
673                 unsigned int s = 0x7FFFFFFF;
674                 for(;;)
675                 {
676                         color_t ramp[2] = {
677                                 c0next,
678                                 c1next
679                         };
680                         s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
681                         unsigned int s2 = s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out2, r2, ColorDist, in, iw, w, h, ramp);
682                         if(s2 < s)
683                         {
684                                 out = out2;
685                                 s = s2;
686                                 c0 = c0next;
687                                 c1 = c1next;
688                                 if(!r2.evaluate(c0next, c1next))
689                                         break;
690                         }
691                         else
692                                 break;
693                         out2.clear();
694                 }
695                 if(have_trans ? c1 < c0 : c0 < c1)
696                 {
697                         std::swap(c0, c1);
698                         for(int i = 0; i < 16; ++i)
699                                 if(!(out.get(i) & 2))
700                                         out.do_xor(i, 1);
701                 }
702         }
703
704         // REFINE_ALWAYS: refine, do not check
705         template<ColorDistFunc ColorDist, bool have_trans>
706         inline void s2tc_dxt1_encode_color_refine_always(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
707         {
708                 color_t ramp[2] = {
709                         c0,
710                         c1
711                 };
712                 s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
713                 s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
714                 r2.evaluate(c0, c1);
715                 if(have_trans ? c1 < c0 : c0 < c1)
716                 {
717                         std::swap(c0, c1);
718                         for(int i = 0; i < 16; ++i)
719                                 if(!(out.get(i) & 2))
720                                         out.do_xor(i, 1);
721                 }
722         }
723
724         // REFINE_NEVER: do not refine
725         template<ColorDistFunc ColorDist, bool have_trans>
726         inline void s2tc_dxt1_encode_color_refine_never(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
727         {
728                 if(have_trans ? c1 < c0 : c0 < c1)
729                         std::swap(c0, c1);
730                 color_t ramp[2] = {
731                         c0,
732                         c1
733                 };
734                 s2tc_evaluate_colors_result_null_t<color_t> r2;
735                 s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
736         }
737
738         inline void s2tc_dxt3_encode_alpha(bitarray<uint64_t, 16, 4> &out, const unsigned char *in, int iw, int w, int h)
739         {
740                 for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
741                 {
742                         int i = y * 4 + x;
743                         const unsigned char *pix = &in[(y * iw + x) * 4];
744                         out.do_or(i, pix[3]);
745                 }
746         }
747
748         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
749         inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
750         {
751                 color_t c[16 + (nrandom >= 0 ? nrandom : 0)];
752                 unsigned char ca[16 + (nrandom >= 0 ? nrandom : 0)];
753                 int n = 0, m = 0;
754                 int x, y;
755
756                 if(mode == MODE_FAST)
757                 {
758                         // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
759
760                         color_t c0(0);
761
762                         // dummy values because we don't know whether the first pixel willw rite
763                         c[0].r = 31;
764                         c[0].g = 63;
765                         c[0].b = 31;
766                         c[1].r = 0;
767                         c[1].g = 0;
768                         c[1].b = 0;
769                         int dmin = 0x7FFFFFFF;
770                         int dmax = 0;
771                         if(dxt == DXT5)
772                         {
773                                 ca[0] = rgba[3];
774                                 ca[1] = ca[0];
775                         }
776
777                         for(x = 0; x < w; ++x)
778                                 for(y = 0; y < h; ++y)
779                                 {
780                                         c[2].r = rgba[(x + y * iw) * 4 + 0];
781                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
782                                         c[2].b = rgba[(x + y * iw) * 4 + 2];
783                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
784                                         // MODE_FAST doesn't work for normalmaps, so this works
785                                         if(!ca[2])
786                                                 continue;
787
788                                         int d = ColorDist(c[2], c0);
789                                         if(d > dmax)
790                                         {
791                                                 dmax = d;
792                                                 c[1] = c[2];
793                                         }
794                                         if(d < dmin)
795                                         {
796                                                 dmin = d;
797                                                 c[0] = c[2];
798                                         }
799
800                                         if(dxt == DXT5)
801                                         {
802                                                 if(ca[2] != 255)
803                                                 {
804                                                         if(ca[2] > ca[1])
805                                                                 ca[1] = ca[2];
806                                                         if(ca[2] < ca[0])
807                                                                 ca[0] = ca[2];
808                                                 }
809                                         }
810                                 }
811
812                         // if ALL pixels were transparent, this won't stop us
813
814                         m = n = 2;
815                 }
816                 else
817                 {
818                         for(x = 0; x < w; ++x)
819                                 for(y = 0; y < h; ++y)
820                                 {
821                                         c[n].r = rgba[(x + y * iw) * 4 + 0];
822                                         c[n].g = rgba[(x + y * iw) * 4 + 1];
823                                         c[n].b = rgba[(x + y * iw) * 4 + 2];
824                                         ca[n]  = rgba[(x + y * iw) * 4 + 3];
825                                         ++n;
826                                 }
827                         if(n == 0)
828                         {
829                                 n = 1;
830                                 c[0].r = 0;
831                                 c[0].g = 0;
832                                 c[0].b = 0;
833                                 ca[0] = 0;
834                         }
835                         m = n;
836
837                         if(nrandom > 0)
838                         {
839                                 color_t mins = c[0];
840                                 color_t maxs = c[0];
841                                 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
842                                 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
843                                 for(x = 1; x < n; ++x)
844                                 {
845                                         mins.r = min(mins.r, c[x].r);
846                                         mins.g = min(mins.g, c[x].g);
847                                         mins.b = min(mins.b, c[x].b);
848                                         maxs.r = max(maxs.r, c[x].r);
849                                         maxs.g = max(maxs.g, c[x].g);
850                                         maxs.b = max(maxs.b, c[x].b);
851                                         if(dxt == DXT5)
852                                         {
853                                                 mina = min(mina, ca[x]);
854                                                 maxa = max(maxa, ca[x]);
855                                         }
856                                 }
857                                 color_t len(maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1);
858                                 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
859                                 for(x = 0; x < nrandom; ++x)
860                                 {
861                                         c[m].r = mins.r + rand() % len.r;
862                                         c[m].g = mins.g + rand() % len.g;
863                                         c[m].b = mins.b + rand() % len.b;
864                                         if(dxt == DXT5)
865                                                 ca[m] = mina + rand() % lena;
866                                         ++m;
867                                 }
868                         }
869                         else
870                         {
871                                 // hack for last miplevel
872                                 if(n == 1)
873                                 {
874                                         c[1] = c[0];
875                                         m = n = 2;
876                                 }
877                         }
878
879                         reduce_colors_inplace(c, n, m, ColorDist);
880                         if(dxt == DXT5)
881                                 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
882                 }
883
884                 switch(dxt)
885                 {
886                         case DXT1:
887                                 {
888                                         bitarray<uint32_t, 16, 2> colorblock;
889                                         switch(refine)
890                                         {
891                                                 case REFINE_NEVER:
892                                                         s2tc_dxt1_encode_color_refine_never<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
893                                                         break;
894                                                 case REFINE_ALWAYS:
895                                                         s2tc_dxt1_encode_color_refine_always<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
896                                                         break;
897                                                 case REFINE_LOOP:
898                                                         s2tc_dxt1_encode_color_refine_loop<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
899                                                         break;
900                                         }
901                                         out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
902                                         out[1] = (c[0].r << 3) | (c[0].g >> 3);
903                                         out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
904                                         out[3] = (c[1].r << 3) | (c[1].g >> 3);
905                                         colorblock.tobytes(&out[4]);
906                                 }
907                                 break;
908                         case DXT3:
909                                 {
910                                         bitarray<uint32_t, 16, 2> colorblock;
911                                         bitarray<uint64_t, 16, 4> alphablock;
912                                         switch(refine)
913                                         {
914                                                 case REFINE_NEVER:
915                                                         s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
916                                                         break;
917                                                 case REFINE_ALWAYS:
918                                                         s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
919                                                         break;
920                                                 case REFINE_LOOP:
921                                                         s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
922                                                         break;
923                                         }
924                                         s2tc_dxt3_encode_alpha(alphablock, rgba, iw, w, h);
925                                         alphablock.tobytes(&out[0]);
926                                         out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
927                                         out[9] = (c[0].r << 3) | (c[0].g >> 3);
928                                         out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
929                                         out[11] = (c[1].r << 3) | (c[1].g >> 3);
930                                         colorblock.tobytes(&out[12]);
931                                 }
932                                 break;
933                         case DXT5:
934                                 {
935                                         bitarray<uint32_t, 16, 2> colorblock;
936                                         bitarray<uint64_t, 16, 3> alphablock;
937                                         switch(refine)
938                                         {
939                                                 case REFINE_NEVER:
940                                                         s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
941                                                         s2tc_dxt5_encode_alpha_refine_never(alphablock, rgba, iw, w, h, ca[0], ca[1]);
942                                                         break;
943                                                 case REFINE_ALWAYS:
944                                                         s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
945                                                         s2tc_dxt5_encode_alpha_refine_always(alphablock, rgba, iw, w, h, ca[0], ca[1]);
946                                                         break;
947                                                 case REFINE_LOOP:
948                                                         s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
949                                                         s2tc_dxt5_encode_alpha_refine_loop(alphablock, rgba, iw, w, h, ca[0], ca[1]);
950                                                         break;
951                                         }
952                                         out[0] = ca[0];
953                                         out[1] = ca[1];
954                                         alphablock.tobytes(&out[2]);
955                                         out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
956                                         out[9] = (c[0].r << 3) | (c[0].g >> 3);
957                                         out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
958                                         out[11] = (c[1].r << 3) | (c[1].g >> 3);
959                                         colorblock.tobytes(&out[12]);
960                                 }
961                                 break;
962                 }
963         }
964
965         // compile time dispatch magic
966         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
967         inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
968         {
969                 switch(refine)
970                 {
971                         case REFINE_NEVER:
972                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
973                         case REFINE_LOOP:
974                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
975                         default:
976                         case REFINE_ALWAYS:
977                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
978                 }
979         }
980
981         // these color dist functions do not need the refinement check, as they always improve the situation
982         template<ColorDistFunc ColorDist> struct supports_fast
983         {
984                 static const bool value = true;
985         };
986         template<> struct supports_fast<color_dist_normalmap>
987         {
988                 static const bool value = false;
989         };
990
991         template<DxtMode dxt, ColorDistFunc ColorDist>
992         inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
993         {
994                 if(!supports_fast<ColorDist>::value  || nrandom >= 0)
995                         return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
996                 else
997                         return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
998         }
999
1000         template<ColorDistFunc ColorDist>
1001         inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
1002         {
1003                 switch(dxt)
1004                 {
1005                         case DXT1:
1006                                 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
1007                                 break;
1008                         case DXT3:
1009                                 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
1010                                 break;
1011                         default:
1012                         case DXT5:
1013                                 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
1014                                 break;
1015                 }
1016         }
1017 };
1018
1019 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
1020 {
1021         switch(cd)
1022         {
1023                 case RGB:
1024                         return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
1025                         break;
1026                 case YUV:
1027                         return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
1028                         break;
1029                 case SRGB:
1030                         return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
1031                         break;
1032                 case SRGB_MIXED:
1033                         return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
1034                         break;
1035                 case AVG:
1036                         return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
1037                         break;
1038                 default:
1039                 case WAVG:
1040                         return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
1041                         break;
1042                 case NORMALMAP:
1043                         return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
1044                         break;
1045         }
1046 }
1047
1048 namespace
1049 {
1050         inline int diffuse(int *diff, int src, int shift)
1051         {
1052                 const int maxval = (1 << (8 - shift)) - 1;
1053                 src += *diff;
1054                 int ret = max(0, min(src >> shift, maxval));
1055                 // simulate decoding ("loop filter")
1056                 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
1057                 *diff = src - loop;
1058                 return ret;
1059         }
1060         inline int diffuse1(int *diff, int src)
1061         {
1062                 src += *diff;
1063                 int ret = (src >= 128);
1064                 // simulate decoding ("loop filter")
1065                 int loop = ret ? 255 : 0;
1066                 *diff = src - loop;
1067                 return ret;
1068         }
1069
1070         inline int floyd(int *thisrow, int *downrow, int src, int shift)
1071         {
1072                 const int maxval = (1 << (8 - shift)) - 1;
1073                 src = (src << 4) | (src >> 4);
1074                 src += thisrow[1];
1075                 int ret = max(0, min(src >> (shift + 4), maxval));
1076                 // simulate decoding ("loop filter")
1077                 int loop = (ret * 4095 / maxval);
1078                 int err = src - loop;
1079                 int e7 = (err * 7 + 8) / 16;
1080                 err -= e7;
1081                 int e3 = (err * 3 + 4) / 9;
1082                 err -= e3;
1083                 int e5 = (err * 5 + 3) / 6;
1084                 err -= e5;
1085                 int e1 = err;
1086                 thisrow[2] += e7;
1087                 downrow[0] += e3;
1088                 downrow[1] += e5;
1089                 downrow[2] += e1;
1090                 return ret;
1091         }
1092
1093         inline int floyd1(int *thisrow, int *downrow, int src)
1094         {
1095                 src = (src << 4) | (src >> 4);
1096                 src += thisrow[1];
1097                 int ret = (src >= 2048);
1098                 // simulate decoding ("loop filter")
1099                 int loop = ret ? 4095 : 0;
1100                 int err = src - loop;
1101                 int e7 = (err * 7 + 8) / 16;
1102                 err -= e7;
1103                 int e3 = (err * 3 + 4) / 9;
1104                 err -= e3;
1105                 int e5 = (err * 5 + 3) / 6;
1106                 err -= e5;
1107                 int e1 = err;
1108                 thisrow[2] += e7;
1109                 downrow[0] += e3;
1110                 downrow[1] += e5;
1111                 downrow[2] += e1;
1112                 return ret;
1113         }
1114
1115         template<int srccomps, int alphabits, DitherMode dither>
1116         inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h)
1117         {
1118                 int x, y;
1119                 switch(dither)
1120                 {
1121                         case DITHER_NONE:
1122                                 {
1123                                         for(y = 0; y < h; ++y)
1124                                                 for(x = 0; x < w; ++x)
1125                                                 {
1126                                                         out[(x + y * w) * 4 + 0] = rgba[(x + y * w) * srccomps + 0] >> 3;
1127                                                         out[(x + y * w) * 4 + 1] = rgba[(x + y * w) * srccomps + 1] >> 2;
1128                                                         out[(x + y * w) * 4 + 2] = rgba[(x + y * w) * srccomps + 2] >> 3;
1129                                                 }
1130                                         if(srccomps == 4)
1131                                         {
1132                                                 if(alphabits == 1)
1133                                                 {
1134                                                         for(y = 0; y < h; ++y)
1135                                                                 for(x = 0; x < w; ++x)
1136                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> 7;
1137                                                 }
1138                                                 else if(alphabits == 8)
1139                                                 {
1140                                                         for(y = 0; y < h; ++y)
1141                                                                 for(x = 0; x < w; ++x)
1142                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1143                                                 }
1144                                                 else
1145                                                 {
1146                                                         int alphadiffuse = 8 - alphabits;
1147                                                         for(y = 0; y < h; ++y)
1148                                                                 for(x = 0; x < w; ++x)
1149                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> (8 - alphabits);
1150                                                 }
1151                                         }
1152                                         else
1153                                         {
1154                                                 for(y = 0; y < h; ++y)
1155                                                         for(x = 0; x < w; ++x)
1156                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1157                                         }
1158                                 }
1159                                 break;
1160                         case DITHER_SIMPLE:
1161                                 {
1162                                         int x, y;
1163                                         int diffuse_r = 0;
1164                                         int diffuse_g = 0;
1165                                         int diffuse_b = 0;
1166                                         int diffuse_a = 0;
1167                                         for(y = 0; y < h; ++y)
1168                                                 for(x = 0; x < w; ++x)
1169                                                 {
1170                                                         out[(x + y * w) * 4 + 0] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1171                                                         out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1172                                                         out[(x + y * w) * 4 + 2] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1173                                                 }
1174                                         if(srccomps == 4)
1175                                         {
1176                                                 if(alphabits == 1)
1177                                                 {
1178                                                         for(y = 0; y < h; ++y)
1179                                                                 for(x = 0; x < w; ++x)
1180                                                                         out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1181                                                 }
1182                                                 else if(alphabits == 8)
1183                                                 {
1184                                                         for(y = 0; y < h; ++y)
1185                                                                 for(x = 0; x < w; ++x)
1186                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1187                                                 }
1188                                                 else
1189                                                 {
1190                                                         for(y = 0; y < h; ++y)
1191                                                                 for(x = 0; x < w; ++x)
1192                                                                         out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1193                                                 }
1194                                         }
1195                                         else
1196                                         {
1197                                                 for(y = 0; y < h; ++y)
1198                                                         for(x = 0; x < w; ++x)
1199                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1200                                         }
1201                                 }
1202                                 break;
1203                         case DITHER_FLOYDSTEINBERG:
1204                                 {
1205                                         int x, y;
1206                                         int pw = w+2;
1207                                         int downrow[6*pw];
1208                                         memset(downrow, 0, sizeof(downrow));
1209                                         int *thisrow_r, *thisrow_g, *thisrow_b, *thisrow_a;
1210                                         int *downrow_r, *downrow_g, *downrow_b, *downrow_a;
1211                                         for(y = 0; y < h; ++y)
1212                                         {
1213                                                 thisrow_r = downrow + ((y&1)?3:0) * pw;
1214                                                 downrow_r = downrow + ((y&1)?0:3) * pw;
1215                                                 memset(downrow_r, 0, sizeof(*downrow_r) * (3*pw));
1216                                                 thisrow_g = thisrow_r + pw;
1217                                                 thisrow_b = thisrow_g + pw;
1218                                                 downrow_g = downrow_r + pw;
1219                                                 downrow_b = downrow_g + pw;
1220                                                 for(x = 0; x < w; ++x)
1221                                                 {
1222                                                         out[(x + y * w) * 4 + 0] = floyd(&thisrow_r[x], &downrow_r[x], rgba[(x + y * w) * srccomps + 0], 3);
1223                                                         out[(x + y * w) * 4 + 1] = floyd(&thisrow_g[x], &downrow_g[x], rgba[(x + y * w) * srccomps + 1], 2);
1224                                                         out[(x + y * w) * 4 + 2] = floyd(&thisrow_b[x], &downrow_b[x], rgba[(x + y * w) * srccomps + 2], 3);
1225                                                 }
1226                                         }
1227                                         if(srccomps == 4)
1228                                         {
1229                                                 if(alphabits == 1)
1230                                                 {
1231                                                         for(y = 0; y < h; ++y)
1232                                                         {
1233                                                                 thisrow_a = downrow + (y&1) * pw;
1234                                                                 downrow_a = downrow + !(y&1) * pw;
1235                                                                 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1236                                                                 for(x = 0; x < w; ++x)
1237                                                                         out[(x + y * w) * 4 + 3] = floyd1(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3]);
1238                                                         }
1239                                                 }
1240                                                 else if(alphabits == 8)
1241                                                 {
1242                                                         for(y = 0; y < h; ++y)
1243                                                                 for(x = 0; x < w; ++x)
1244                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1245                                                 }
1246                                                 else
1247                                                 {
1248                                                         for(y = 0; y < h; ++y)
1249                                                         {
1250                                                                 thisrow_a = downrow + (y&1) * pw;
1251                                                                 downrow_a = downrow + !(y&1) * pw;
1252                                                                 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1253                                                                 for(x = 0; x < w; ++x)
1254                                                                         out[(x + y * w) * 4 + 3] = floyd(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1255                                                         }
1256                                                 }
1257                                         }
1258                                         else
1259                                         {
1260                                                 for(y = 0; y < h; ++y)
1261                                                         for(x = 0; x < w; ++x)
1262                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1263                                         }
1264                                 }
1265                                 break;
1266                 }
1267         }
1268
1269         template<int srccomps, int alphabits>
1270         inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, DitherMode dither)
1271         {
1272                 switch(dither)
1273                 {
1274                         case DITHER_NONE:
1275                                 rgb565_image<srccomps, alphabits, DITHER_NONE>(out, rgba, w, h);
1276                                 break;
1277                         default:
1278                         case DITHER_SIMPLE:
1279                                 rgb565_image<srccomps, alphabits, DITHER_SIMPLE>(out, rgba, w, h);
1280                                 break;
1281                         case DITHER_FLOYDSTEINBERG:
1282                                 rgb565_image<srccomps, alphabits, DITHER_FLOYDSTEINBERG>(out, rgba, w, h);
1283                                 break;
1284                 }
1285         }
1286
1287         template<int srccomps>
1288         inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int alphabits, DitherMode dither)
1289         {
1290                 switch(alphabits)
1291                 {
1292                         case 1:
1293                                 rgb565_image<srccomps, 1>(out, rgba, w, h, dither);
1294                                 break;
1295                         case 4:
1296                                 rgb565_image<srccomps, 4>(out, rgba, w, h, dither);
1297                                 break;
1298                         default:
1299                         case 8:
1300                                 rgb565_image<srccomps, 8>(out, rgba, w, h, dither);
1301                                 break;
1302                 }
1303         }
1304 };
1305
1306 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int alphabits, DitherMode dither)
1307 {
1308         switch(srccomps)
1309         {
1310                 case 3:
1311                         rgb565_image<3>(out, rgba, w, h, alphabits, dither);
1312                 case 4:
1313                 default:
1314                         rgb565_image<4>(out, rgba, w, h, alphabits, dither);
1315         }
1316 }