OSDN Git Service

Don't try to link to libdl on non-Linux systems
[android-x86/external-s2tc.git] / s2tc_algorithm.cpp
1 /*
2  * Copyright (C) 2011  Rudolf Polzer   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21 #define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
22 #include "s2tc_license.h"
23
24 #include <math.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <algorithm>
30 #include <iostream>
31
32 #include "s2tc_algorithm.h"
33 #include "s2tc_common.h"
34
35 namespace
36 {
37         template<class T> struct color_type_info
38         {
39         };
40         template<> struct color_type_info<unsigned char>
41         {
42                 static const unsigned char min_value = 0;
43                 static const unsigned char max_value = 255;
44         };
45
46         struct color_t
47         {
48                 signed char r, g, b;
49         };
50         inline color_t make_color_t()
51         {
52                 return (color_t) {0, 0, 0};
53         }
54         inline color_t make_color_t(signed char r_, signed char g_, signed char b_)
55         {
56                 return (color_t) {r_, g_, b_};
57         }
58         inline color_t make_color_t(int i)
59         {
60                 return (color_t) {(signed char)(i >> 3), (signed char)(i >> 2), (signed char)(i >> 3)};
61         }
62         inline bool operator==(const color_t &a, const color_t &b)
63         {
64                 return a.r == b.r && a.g == b.g && a.b == b.b;
65         }
66         inline bool operator<(const color_t &a, const color_t &b)
67         {
68                 signed char d;
69                 d = a.r - b.r;
70                 if(d)
71                         return d < 0;
72                 d = a.g - b.g;
73                 if(d)
74                         return d < 0;
75                 d = a.b - b.b;
76                 return d < 0;
77         }
78         inline color_t &operator--(color_t &c)
79         {
80                 if(c.b > 0)
81                 {
82                         --c.b;
83                 }
84                 else if(c.g > 0)
85                 {
86                         c.b = 31;
87                         --c.g;
88                 }
89                 else if(c.r > 0)
90                 {
91                         c.b = 31;
92                         c.g = 63;
93                         --c.r;
94                 }
95                 else
96                 {
97                         c.b = 31;
98                         c.g = 63;
99                         c.r = 31;
100                 }
101                 return c;
102         }
103         inline color_t &operator++(color_t &c)
104         {
105                 if(c.b < 31)
106                 {
107                         ++c.b;
108                 }
109                 else if(c.g < 63)
110                 {
111                         c.b = 0;
112                         ++c.g;
113                 }
114                 else if(c.r < 31)
115                 {
116                         c.b = 0;
117                         c.g = 0;
118                         ++c.r;
119                 }
120                 else
121                 {
122                         c.b = 0;
123                         c.g = 0;
124                         c.r = 0;
125                 }
126                 return c;
127         }
128         template<> struct color_type_info<color_t>
129         {
130                 static const color_t min_value;
131                 static const color_t max_value;
132         };
133         const color_t color_type_info<color_t>::min_value = { 0, 0, 0 };
134         const color_t color_type_info<color_t>::max_value = { 31, 63, 31 };
135
136         struct bigcolor_t
137         {
138                 int r, g, b;
139
140                 inline bigcolor_t(): r(0), g(0), b(0)
141                 {
142                 }
143
144                 inline bigcolor_t &operator+=(const color_t &c)
145                 {
146                         r += c.r;
147                         g += c.g;
148                         b += c.b;
149                         return *this;
150                 }
151
152                 inline bigcolor_t &operator+=(int v)
153                 {
154                         r += v;
155                         g += v;
156                         b += v;
157                         return *this;
158                 }
159
160                 inline bigcolor_t operator+(int v)
161                 {
162                         bigcolor_t out = *this;
163                         out += v;
164                         return out;
165                 }
166
167                 inline bigcolor_t &operator/=(int v)
168                 {
169                         r /= v;
170                         g /= v;
171                         b /= v;
172                         return *this;
173                 }
174
175                 inline bigcolor_t operator/(int v)
176                 {
177                         bigcolor_t out = *this;
178                         out /= v;
179                         return out;
180                 }
181
182                 inline bigcolor_t &operator<<=(int v)
183                 {
184                         r <<= v;
185                         g <<= v;
186                         b <<= v;
187                         return *this;
188                 }
189
190                 inline bigcolor_t operator<<(int v)
191                 {
192                         bigcolor_t out = *this;
193                         out <<= v;
194                         return out;
195                 }
196
197                 inline operator color_t()
198                 {
199                         color_t out;
200                         out.r = r & 31;
201                         out.g = g & 63;
202                         out.b = b & 31;
203                         return out;
204                 }
205         };
206
207         std::ostream &operator<<(std::ostream &ost, const color_t &c)
208         {
209                 return ost << "make_color_t(" << int(c.r) << ", " << int(c.g) << ", " << int(c.b) << ")";
210         }
211
212         std::ostream &operator<<(std::ostream &ost, const bigcolor_t &c)
213         {
214                 return ost << "bigcolor_t(" << c.r << ", " << c.g << ", " << c.b << ")";
215         }
216
217         // 16 differences must fit in int
218         // i.e. a difference must be lower than 2^27
219
220         // shift right, rounded
221 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
222
223         inline int color_dist_avg(const color_t &a, const color_t &b)
224         {
225                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
226                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
227                 int db = a.b - b.b; // multiplier: 31 (-1..1)
228                 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
229         }
230
231         inline int color_dist_wavg(const color_t &a, const color_t &b)
232         {
233                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
234                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
235                 int db = a.b - b.b; // multiplier: 31 (-1..1)
236                 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
237                 // weighted 4:16:1
238         }
239
240         inline int color_dist_yuv(const color_t &a, const color_t &b)
241         {
242                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
243                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
244                 int db = a.b - b.b; // multiplier: 31 (-1..1)
245                 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
246                 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
247                 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
248                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
249                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
250                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
251         }
252
253         inline int color_dist_rgb(const color_t &a, const color_t &b)
254         {
255                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
256                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
257                 int db = a.b - b.b; // multiplier: 31 (-1..1)
258                 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
259                 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
260                 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
261                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
262                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
263                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
264         }
265
266         inline int color_dist_srgb(const color_t &a, const color_t &b)
267         {
268                 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
269                 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
270                 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
271                 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
272                 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
273                 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
274                 int sy = SHRR(y, 3) * SHRR(y, 4);
275                 int su = SHRR(u, 3) * SHRR(u, 4);
276                 int sv = SHRR(v, 3) * SHRR(v, 4);
277                 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
278                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
279                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
280         }
281
282         inline int srgb_get_y(const color_t &a)
283         {
284                 // convert to linear
285                 int r = a.r * (int) a.r;
286                 int g = a.g * (int) a.g;
287                 int b = a.b * (int) a.b;
288                 // find luminance
289                 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
290                 // square root it (!)
291                 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
292                 return y;
293         }
294
295         inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
296         {
297                 // get Y
298                 int ay = srgb_get_y(a);
299                 int by = srgb_get_y(b);
300                 // get UV
301                 int au = a.r * 191 - ay;
302                 int av = a.b * 191 - ay;
303                 int bu = b.r * 191 - by;
304                 int bv = b.b * 191 - by;
305                 // get differences
306                 int y = ay - by;
307                 int u = au - bu;
308                 int v = av - bv;
309                 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
310                 // weight for u: ???
311                 // weight for v: ???
312         }
313
314         inline int color_dist_normalmap(const color_t &a, const color_t &b)
315         {
316                 float ca[3], cb[3], n;
317                 ca[0] = a.r / 31.0f * 2 - 1;
318                 ca[1] = a.g / 63.0f * 2 - 1;
319                 ca[2] = a.b / 31.0f * 2 - 1;
320                 cb[0] = b.r / 31.0f * 2 - 1;
321                 cb[1] = b.g / 63.0f * 2 - 1;
322                 cb[2] = b.b / 31.0f * 2 - 1;
323                 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
324                 if(n > 0)
325                 {
326                         n = 1.0f / sqrtf(n);
327                         ca[0] *= n;
328                         ca[1] *= n;
329                         ca[2] *= n;
330                 }
331                 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
332                 if(n > 0)
333                 {
334                         n = 1.0f / sqrtf(n);
335                         cb[0] *= n;
336                         cb[1] *= n;
337                         cb[2] *= n;
338                 }
339
340                 return
341                         100000 *
342                         (
343                                 (cb[0] - ca[0]) * (cb[0] - ca[0])
344                                 +
345                                 (cb[1] - ca[1]) * (cb[1] - ca[1])
346                                 +
347                                 (cb[2] - ca[2]) * (cb[2] - ca[2])
348                         )
349                         ;
350                 // max value: 1000 * (4 + 4 + 4) = 6000
351         }
352
353         typedef int ColorDistFunc(const color_t &a, const color_t &b);
354
355         inline int alpha_dist(unsigned char a, unsigned char b)
356         {
357                 return (a - (int) b) * (a - (int) b);
358         }
359
360         template <class T, class F>
361         // n: input count
362         // m: total color count (including non-counted inputs)
363         // m >= n
364         inline void reduce_colors_inplace(T *c, int n, int m, F dist)
365         {
366                 int i, j, k;
367                 int bestsum = -1;
368                 int besti = 0;
369                 int bestj = 1;
370                 int dists[m][n];
371                 // first the square
372                 for(i = 0; i < n; ++i)
373                 {
374                         dists[i][i] = 0;
375                         for(j = i+1; j < n; ++j)
376                         {
377                                 int d = dist(c[i], c[j]);
378                                 dists[i][j] = dists[j][i] = d;
379                         }
380                 }
381                 // then the box
382                 for(; i < m; ++i)
383                 {
384                         for(j = 0; j < n; ++j)
385                         {
386                                 int d = dist(c[i], c[j]);
387                                 dists[i][j] = d;
388                         }
389                 }
390                 for(i = 0; i < m; ++i)
391                         for(j = i+1; j < m; ++j)
392                         {
393                                 int sum = 0;
394                                 for(k = 0; k < n; ++k)
395                                 {
396                                         int di = dists[i][k];
397                                         int dj = dists[j][k];
398                                         int m  = min(di, dj);
399                                         sum += m;
400                                 }
401                                 if(bestsum < 0 || sum < bestsum)
402                                 {
403                                         bestsum = sum;
404                                         besti = i;
405                                         bestj = j;
406                                 }
407                         }
408                 T c0 = c[besti];
409                 c[1] = c[bestj];
410                 c[0] = c0;
411         }
412         template <class T, class F>
413         inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
414         {
415                 // TODO fix this for ramp encoding!
416                 int i, j, k;
417                 int bestsum = -1;
418                 int besti = 0;
419                 int bestj = 1;
420                 int dists[m+2][n];
421                 // first the square
422                 for(i = 0; i < n; ++i)
423                 {
424                         dists[i][i] = 0;
425                         for(j = i+1; j < n; ++j)
426                         {
427                                 int d = dist(c[i], c[j]);
428                                 dists[i][j] = dists[j][i] = d;
429                         }
430                 }
431                 // then the box
432                 for(; i < m; ++i)
433                 {
434                         for(j = 0; j < n; ++j)
435                         {
436                                 int d = dist(c[i], c[j]);
437                                 dists[i][j] = d;
438                         }
439                 }
440                 // then the two extra rows
441                 for(j = 0; j < n; ++j)
442                 {
443                         int d = dist(fix0, c[j]);
444                         dists[m][j] = d;
445                 }
446                 for(j = 0; j < n; ++j)
447                 {
448                         int d = dist(fix1, c[j]);
449                         dists[m+1][j] = d;
450                 }
451                 for(i = 0; i < m; ++i)
452                         for(j = i+1; j < m; ++j)
453                         {
454                                 int sum = 0;
455                                 for(k = 0; k < n; ++k)
456                                 {
457                                         int di = dists[i][k];
458                                         int dj = dists[j][k];
459                                         int d0 = dists[m][k];
460                                         int d1 = dists[m+1][k];
461                                         int m  = min(min(di, dj), min(d0, d1));
462                                         sum += m;
463                                 }
464                                 if(bestsum < 0 || sum < bestsum)
465                                 {
466                                         bestsum = sum;
467                                         besti = i;
468                                         bestj = j;
469                                 }
470                         }
471                 if(besti != 0)
472                         c[0] = c[besti];
473                 if(bestj != 1)
474                         c[1] = c[bestj];
475         }
476
477         enum CompressionMode
478         {
479                 MODE_NORMAL,
480                 MODE_FAST
481         };
482
483         template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
484         {
485                 return comp;
486         }
487         template<> inline int refine_component_encode<color_dist_srgb>(int comp)
488         {
489                 return comp * comp;
490         }
491         template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
492         {
493                 return comp * comp;
494         }
495
496         template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
497         {
498                 return comp;
499         }
500         template<> inline int refine_component_decode<color_dist_srgb>(int comp)
501         {
502                 return sqrtf(comp) + 0.5f;
503         }
504         template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
505         {
506                 return sqrtf(comp) + 0.5f;
507         }
508
509         template <class T, class Big, int scale_l>
510         struct s2tc_evaluate_colors_result_t;
511
512         template <class T, class Big>
513         struct s2tc_evaluate_colors_result_t<T, Big, 1>
514         {
515                 // uses:
516                 //   Big << int
517                 //   Big / int
518                 //   Big + int
519                 //   Big += T
520                 int n0, n1;
521                 Big S0, S1;
522                 inline s2tc_evaluate_colors_result_t():
523                         n0(), n1(), S0(), S1()
524                 {
525                 }
526                 inline void add(int l, T a)
527                 {
528                         if(l)
529                         {
530                                 ++n1;
531                                 S1 += a;
532                         }
533                         else
534                         {
535                                 ++n0;
536                                 S0 += a;
537                         }
538                 }
539                 inline bool evaluate(T &a, T &b)
540                 {
541                         if(!n0 && !n1)
542                                 return false;
543                         if(n0)
544                                 a = ((S0 << 1) + n0) / (n0 << 1);
545                         if(n1)
546                                 b = ((S1 << 1) + n1) / (n1 << 1);
547                         return true;
548                 }
549         };
550
551         template <class T, class Big, int scale_l>
552         struct s2tc_evaluate_colors_result_t
553         {
554                 // a possible implementation of inferred color/alpha values
555                 // refining would go here
556         };
557
558         template <class T>
559         struct s2tc_evaluate_colors_result_null_t
560         {
561                 inline void add(int l, T a)
562                 {
563                 }
564         };
565
566         template<class T> T get(const unsigned char *buf)
567         {
568                 T c;
569                 c.r = buf[0];
570                 c.g = buf[1];
571                 c.b = buf[2];
572                 return c;
573         }
574         template<> unsigned char get<unsigned char>(const unsigned char *buf)
575         {
576                 return buf[3]; // extract alpha
577         }
578
579         template<class T, class Big, int bpp, bool have_trans, bool have_0_255, int n_input, class Dist, class Eval, class Arr>
580         inline unsigned int s2tc_try_encode_block(
581                         Arr &out,
582                         Eval &res,
583                         Dist ColorDist,
584                         const unsigned char *in, int iw, int w, int h,
585                         const T colors_ref[])
586         {
587                 unsigned int score = 0;
588                 for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
589                 {
590                         int i = y * 4 + x;
591                         const unsigned char *pix = &in[(y * iw + x) * 4];
592
593                         if(have_trans)
594                         {
595                                 if(pix[3] == 0)
596                                 {
597                                         out.do_or(i, (1 << bpp) - 1);
598                                         continue;
599                                 }
600                         }
601
602                         T color(get<T>(pix));
603                         int best = 0;
604                         int bestdist = ColorDist(color, colors_ref[0]);
605                         for(int k = 1; k < n_input; ++k)
606                         {
607                                 int dist = ColorDist(color, colors_ref[k]);
608                                 if(dist < bestdist)
609                                 {
610                                         bestdist = dist;
611                                         best = k;
612                                 }
613                         }
614                         if(have_0_255)
615                         {
616                                 int dist_0 = ColorDist(color, color_type_info<T>::min_value);
617                                 if(dist_0 <= bestdist)
618                                 {
619                                         bestdist = dist_0;
620                                         out.do_or(i, (1 << bpp) - 2);
621                                         score += bestdist;
622                                         continue;
623                                 }
624                                 int dist_255 = ColorDist(color, color_type_info<T>::max_value);
625                                 if(dist_255 <= bestdist)
626                                 {
627                                         bestdist = dist_255;
628                                         out.do_or(i, (1 << bpp) - 1);
629                                         score += bestdist;
630                                         continue;
631                                 }
632                         }
633
634                         // record
635                         res.add(best, color);
636                         out.do_or(i, best);
637                         score += bestdist;
638                 }
639                 return score;
640         }
641
642         // REFINE_LOOP: refine, take result over only if score improved, loop until it did not
643         inline void s2tc_dxt5_encode_alpha_refine_loop(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
644         {
645                 bitarray<uint64_t, 16, 3> out2;
646                 unsigned char a0next = a0, a1next = a1;
647                 unsigned int s = 0x7FFFFFFF;
648                 for(;;)
649                 {
650                         unsigned char ramp[2] = {
651                                 a0next,
652                                 a1next
653                         };
654                         s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
655                         unsigned int s2 = s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out2, r2, alpha_dist, in, iw, w, h, ramp);
656                         if(s2 < s)
657                         {
658                                 out = out2;
659                                 s = s2;
660                                 a0 = a0next;
661                                 a1 = a1next;
662                                 if(!r2.evaluate(a0next, a1next))
663                                         break;
664                         }
665                         else
666                                 break;
667                         out2.clear();
668                 }
669
670                 if(a1 == a0)
671                 {
672                         if(a0 == 255)
673                                 --a1;
674                         else
675                                 ++a1;
676                         for(int i = 0; i < 16; ++i) switch(out.get(i))
677                         {
678                                 case 1:
679                                         out.set(i, 0);
680                                         break;
681                         }
682                 }
683
684                 if(a1 < a0)
685                 {
686                         std::swap(a0, a1);
687                         for(int i = 0; i < 16; ++i) switch(out.get(i))
688                         {
689                                 case 0:
690                                         out.set(i, 1);
691                                         break;
692                                 case 1:
693                                         out.set(i, 0);
694                                         break;
695                                 case 6:
696                                 case 7:
697                                         break;
698                                 default:
699                                         out.set(i, 7 - out.get(i));
700                                         break;
701                         }
702                 }
703         }
704
705         // REFINE_ALWAYS: refine, do not check
706         inline void s2tc_dxt5_encode_alpha_refine_always(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
707         {
708                 unsigned char ramp[2] = {
709                         a0,
710                         a1
711                 };
712                 s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
713                 s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out, r2, alpha_dist, in, iw, w, h, ramp);
714                 r2.evaluate(a0, a1);
715
716                 if(a1 == a0)
717                 {
718                         if(a0 == 255)
719                                 --a1;
720                         else
721                                 ++a1;
722                         for(int i = 0; i < 16; ++i) switch(out.get(i))
723                         {
724                                 case 1:
725                                         out.set(i, 0);
726                                         break;
727                         }
728                 }
729
730                 if(a1 < a0)
731                 {
732                         std::swap(a0, a1);
733                         for(int i = 0; i < 16; ++i) switch(out.get(i))
734                         {
735                                 case 0:
736                                         out.set(i, 1);
737                                         break;
738                                 case 1:
739                                         out.set(i, 0);
740                                         break;
741                                 case 6:
742                                 case 7:
743                                         break;
744                                 default:
745                                         out.set(i, 7 - out.get(i));
746                                         break;
747                         }
748                 }
749         }
750
751         // REFINE_NEVER: do not refine
752         inline void s2tc_dxt5_encode_alpha_refine_never(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
753         {
754                 if(a1 < a0)
755                         std::swap(a0, a1);
756                 unsigned char ramp[6] = {
757                         a0,
758                         a1
759                 };
760                 s2tc_evaluate_colors_result_null_t<unsigned char> r2;
761                 s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out, r2, alpha_dist, in, iw, w, h, ramp);
762         }
763
764         // REFINE_LOOP: refine, take result over only if score improved, loop until it did not
765         template<ColorDistFunc ColorDist, bool have_trans>
766         inline void s2tc_dxt1_encode_color_refine_loop(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
767         {
768                 bitarray<uint32_t, 16, 2> out2;
769                 color_t c0next = c0, c1next = c1;
770                 unsigned int s = 0x7FFFFFFF;
771                 for(;;)
772                 {
773                         color_t ramp[2] = {
774                                 c0next,
775                                 c1next
776                         };
777                         s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
778                         unsigned int s2 = s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out2, r2, ColorDist, in, iw, w, h, ramp);
779                         if(s2 < s)
780                         {
781                                 out = out2;
782                                 s = s2;
783                                 c0 = c0next;
784                                 c1 = c1next;
785                                 if(!r2.evaluate(c0next, c1next))
786                                         break;
787                         }
788                         else
789                                 break;
790                         out2.clear();
791                 }
792
793                 if(c0 == c1)
794                 {
795                         if(c0 == color_type_info<color_t>::max_value)
796                                 --c1;
797                         else
798                                 ++c1;
799                         for(int i = 0; i < 16; ++i)
800                                 if(!(out.get(i) == 1))
801                                         out.set(i, 0);
802                 }
803
804                 if(have_trans ? c1 < c0 : c0 < c1)
805                 {
806                         std::swap(c0, c1);
807                         for(int i = 0; i < 16; ++i)
808                                 if(!(out.get(i) & 2))
809                                         out.do_xor(i, 1);
810                 }
811         }
812
813         // REFINE_ALWAYS: refine, do not check
814         template<ColorDistFunc ColorDist, bool have_trans>
815         inline void s2tc_dxt1_encode_color_refine_always(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
816         {
817                 color_t ramp[2] = {
818                         c0,
819                         c1
820                 };
821                 s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
822                 s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
823                 r2.evaluate(c0, c1);
824
825                 if(c0 == c1)
826                 {
827                         if(c0 == color_type_info<color_t>::max_value)
828                                 --c1;
829                         else
830                                 ++c1;
831                         for(int i = 0; i < 16; ++i)
832                                 if(!(out.get(i) == 1))
833                                         out.set(i, 0);
834                 }
835
836                 if(have_trans ? c1 < c0 : c0 < c1)
837                 {
838                         std::swap(c0, c1);
839                         for(int i = 0; i < 16; ++i)
840                                 if(!(out.get(i) & 2))
841                                         out.do_xor(i, 1);
842                 }
843         }
844
845         // REFINE_NEVER: do not refine
846         template<ColorDistFunc ColorDist, bool have_trans>
847         inline void s2tc_dxt1_encode_color_refine_never(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
848         {
849                 if(have_trans ? c1 < c0 : c0 < c1)
850                         std::swap(c0, c1);
851                 color_t ramp[2] = {
852                         c0,
853                         c1
854                 };
855                 s2tc_evaluate_colors_result_null_t<color_t> r2;
856                 s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
857         }
858
859         inline void s2tc_dxt3_encode_alpha(bitarray<uint64_t, 16, 4> &out, const unsigned char *in, int iw, int w, int h)
860         {
861                 for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
862                 {
863                         int i = y * 4 + x;
864                         const unsigned char *pix = &in[(y * iw + x) * 4];
865                         out.do_or(i, pix[3]);
866                 }
867         }
868
869         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
870         inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
871         {
872                 color_t c[16 + (nrandom >= 0 ? nrandom : 0)];
873                 unsigned char ca[16 + (nrandom >= 0 ? nrandom : 0)];
874                 int x, y;
875
876                 if(mode == MODE_FAST)
877                 {
878                         // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
879
880                         color_t c0 = make_color_t(0, 0, 0);
881
882                         // dummy values because we don't know whether the first pixel will write
883                         c[0].r = 31;
884                         c[0].g = 63;
885                         c[0].b = 31;
886                         c[1].r = 0;
887                         c[1].g = 0;
888                         c[1].b = 0;
889                         int dmin = 0x7FFFFFFF;
890                         int dmax = 0;
891                         if(dxt == DXT5)
892                         {
893                                 ca[0] = rgba[3];
894                                 ca[1] = ca[0];
895                         }
896
897                         for(x = 0; x < w; ++x)
898                                 for(y = 0; y < h; ++y)
899                                 {
900                                         c[2].r = rgba[(x + y * iw) * 4 + 0];
901                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
902                                         c[2].b = rgba[(x + y * iw) * 4 + 2];
903                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
904                                         if (dxt == DXT1)
905                                                 if(ca[2] == 0)
906                                                         continue;
907                                         // MODE_FAST doesn't work for normalmaps, so this works
908
909                                         int d = ColorDist(c[2], c0);
910                                         if(d > dmax)
911                                         {
912                                                 dmax = d;
913                                                 c[1] = c[2];
914                                         }
915                                         if(d < dmin)
916                                         {
917                                                 dmin = d;
918                                                 c[0] = c[2];
919                                         }
920
921                                         if(dxt == DXT5)
922                                         {
923                                                 if(ca[2] != 255)
924                                                 {
925                                                         if(ca[2] > ca[1])
926                                                                 ca[1] = ca[2];
927                                                         if(ca[2] < ca[0])
928                                                                 ca[0] = ca[2];
929                                                 }
930                                         }
931                                 }
932                 }
933                 else
934                 {
935                         int n = 0, m = 0;
936
937                         for(x = 0; x < w; ++x)
938                                 for(y = 0; y < h; ++y)
939                                 {
940                                         c[n].r = rgba[(x + y * iw) * 4 + 0];
941                                         c[n].g = rgba[(x + y * iw) * 4 + 1];
942                                         c[n].b = rgba[(x + y * iw) * 4 + 2];
943                                         ca[n]  = rgba[(x + y * iw) * 4 + 3];
944                                         if (dxt == DXT1)
945                                                 if(ca[n] == 0)
946                                                         continue;
947                                         ++n;
948                                 }
949                         if(n == 0)
950                         {
951                                 n = 1;
952                                 c[0].r = 0;
953                                 c[0].g = 0;
954                                 c[0].b = 0;
955                                 ca[0] = 0;
956                         }
957                         m = n;
958
959                         if(nrandom > 0)
960                         {
961                                 color_t mins = c[0];
962                                 color_t maxs = c[0];
963                                 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
964                                 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
965                                 for(x = 1; x < n; ++x)
966                                 {
967                                         mins.r = min(mins.r, c[x].r);
968                                         mins.g = min(mins.g, c[x].g);
969                                         mins.b = min(mins.b, c[x].b);
970                                         maxs.r = max(maxs.r, c[x].r);
971                                         maxs.g = max(maxs.g, c[x].g);
972                                         maxs.b = max(maxs.b, c[x].b);
973                                         if(dxt == DXT5)
974                                         {
975                                                 mina = min(mina, ca[x]);
976                                                 maxa = max(maxa, ca[x]);
977                                         }
978                                 }
979                                 color_t len = make_color_t(maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1);
980                                 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
981                                 for(x = 0; x < nrandom; ++x)
982                                 {
983                                         c[m].r = mins.r + rand() % len.r;
984                                         c[m].g = mins.g + rand() % len.g;
985                                         c[m].b = mins.b + rand() % len.b;
986                                         if(dxt == DXT5)
987                                                 ca[m] = mina + rand() % lena;
988                                         ++m;
989                                 }
990                         }
991                         else
992                         {
993                                 // hack for last miplevel
994                                 if(n == 1)
995                                 {
996                                         c[1] = c[0];
997                                         m = n = 2;
998                                 }
999                         }
1000
1001                         reduce_colors_inplace(c, n, m, ColorDist);
1002                         if(dxt == DXT5)
1003                                 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
1004                 }
1005
1006                 // equal colors are BAD
1007                 if(c[0] == c[1])
1008                 {
1009                         if(c[0] == color_type_info<color_t>::max_value)
1010                                 --c[1];
1011                         else
1012                                 ++c[1];
1013                 }
1014
1015                 if(dxt == DXT5)
1016                 {
1017                         if(ca[0] == ca[1])
1018                         {
1019                                 if(ca[0] == 255)
1020                                         --ca[1];
1021                                 else
1022                                         ++ca[1];
1023                         }
1024                 }
1025
1026                 switch(dxt)
1027                 {
1028                         case DXT1:
1029                                 {
1030                                         bitarray<uint32_t, 16, 2> colorblock;
1031                                         switch(refine)
1032                                         {
1033                                                 case REFINE_NEVER:
1034                                                         s2tc_dxt1_encode_color_refine_never<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1035                                                         break;
1036                                                 case REFINE_ALWAYS:
1037                                                         s2tc_dxt1_encode_color_refine_always<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1038                                                         break;
1039                                                 case REFINE_LOOP:
1040                                                         s2tc_dxt1_encode_color_refine_loop<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1041                                                         break;
1042                                         }
1043                                         out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
1044                                         out[1] = (c[0].r << 3) | (c[0].g >> 3);
1045                                         out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
1046                                         out[3] = (c[1].r << 3) | (c[1].g >> 3);
1047                                         colorblock.tobytes(&out[4]);
1048                                 }
1049                                 break;
1050                         case DXT3:
1051                                 {
1052                                         bitarray<uint32_t, 16, 2> colorblock;
1053                                         bitarray<uint64_t, 16, 4> alphablock;
1054                                         switch(refine)
1055                                         {
1056                                                 case REFINE_NEVER:
1057                                                         s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1058                                                         break;
1059                                                 case REFINE_ALWAYS:
1060                                                         s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1061                                                         break;
1062                                                 case REFINE_LOOP:
1063                                                         s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1064                                                         break;
1065                                         }
1066                                         s2tc_dxt3_encode_alpha(alphablock, rgba, iw, w, h);
1067                                         alphablock.tobytes(&out[0]);
1068                                         out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
1069                                         out[9] = (c[0].r << 3) | (c[0].g >> 3);
1070                                         out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
1071                                         out[11] = (c[1].r << 3) | (c[1].g >> 3);
1072                                         colorblock.tobytes(&out[12]);
1073                                 }
1074                                 break;
1075                         case DXT5:
1076                                 {
1077                                         bitarray<uint32_t, 16, 2> colorblock;
1078                                         bitarray<uint64_t, 16, 3> alphablock;
1079                                         switch(refine)
1080                                         {
1081                                                 case REFINE_NEVER:
1082                                                         s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1083                                                         s2tc_dxt5_encode_alpha_refine_never(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1084                                                         break;
1085                                                 case REFINE_ALWAYS:
1086                                                         s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1087                                                         s2tc_dxt5_encode_alpha_refine_always(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1088                                                         break;
1089                                                 case REFINE_LOOP:
1090                                                         s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1091                                                         s2tc_dxt5_encode_alpha_refine_loop(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1092                                                         break;
1093                                         }
1094                                         out[0] = ca[0];
1095                                         out[1] = ca[1];
1096                                         alphablock.tobytes(&out[2]);
1097                                         out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
1098                                         out[9] = (c[0].r << 3) | (c[0].g >> 3);
1099                                         out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
1100                                         out[11] = (c[1].r << 3) | (c[1].g >> 3);
1101                                         colorblock.tobytes(&out[12]);
1102                                 }
1103                                 break;
1104                 }
1105         }
1106
1107         // compile time dispatch magic
1108         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
1109         inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
1110         {
1111                 switch(refine)
1112                 {
1113                         case REFINE_NEVER:
1114                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
1115                         case REFINE_LOOP:
1116                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
1117                         default:
1118                         case REFINE_ALWAYS:
1119                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
1120                 }
1121         }
1122
1123         // these color dist functions do not need the refinement check, as they always improve the situation
1124         template<ColorDistFunc ColorDist> struct supports_fast
1125         {
1126                 static const bool value = true;
1127         };
1128         template<> struct supports_fast<color_dist_normalmap>
1129         {
1130                 static const bool value = false;
1131         };
1132
1133         template<DxtMode dxt, ColorDistFunc ColorDist>
1134         inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
1135         {
1136                 if(!supports_fast<ColorDist>::value || nrandom >= 0)
1137                         return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
1138                 else
1139                         return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
1140         }
1141
1142         template<ColorDistFunc ColorDist>
1143         inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
1144         {
1145                 switch(dxt)
1146                 {
1147                         case DXT1:
1148                                 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
1149                                 break;
1150                         case DXT3:
1151                                 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
1152                                 break;
1153                         default:
1154                         case DXT5:
1155                                 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
1156                                 break;
1157                 }
1158         }
1159 };
1160
1161 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
1162 {
1163         switch(cd)
1164         {
1165                 case RGB:
1166                         return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
1167                         break;
1168                 case YUV:
1169                         return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
1170                         break;
1171                 case SRGB:
1172                         return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
1173                         break;
1174                 case SRGB_MIXED:
1175                         return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
1176                         break;
1177                 case AVG:
1178                         return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
1179                         break;
1180                 default:
1181                 case WAVG:
1182                         return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
1183                         break;
1184                 case NORMALMAP:
1185                         return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
1186                         break;
1187         }
1188 }
1189
1190 namespace
1191 {
1192         inline int diffuse(int *diff, int src, int shift)
1193         {
1194                 const int maxval = (1 << (8 - shift)) - 1;
1195                 src += *diff;
1196                 int ret = max(0, min(src >> shift, maxval));
1197                 // simulate decoding ("loop filter")
1198                 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
1199                 *diff = src - loop;
1200                 return ret;
1201         }
1202         inline int diffuse1(int *diff, int src)
1203         {
1204                 src += *diff;
1205                 int ret = (src >= 128);
1206                 // simulate decoding ("loop filter")
1207                 int loop = ret ? 255 : 0;
1208                 *diff = src - loop;
1209                 return ret;
1210         }
1211
1212         inline int floyd(int *thisrow, int *downrow, int src, int shift)
1213         {
1214                 const int maxval = (1 << (8 - shift)) - 1;
1215                 src = (src << 4) | (src >> 4);
1216                 src += thisrow[1];
1217                 int ret = max(0, min(src >> (shift + 4), maxval));
1218                 // simulate decoding ("loop filter")
1219                 int loop = (ret * 4095 / maxval);
1220                 int err = src - loop;
1221                 int e7 = (err * 7 + 8) / 16;
1222                 err -= e7;
1223                 int e3 = (err * 3 + 4) / 9;
1224                 err -= e3;
1225                 int e5 = (err * 5 + 3) / 6;
1226                 err -= e5;
1227                 int e1 = err;
1228                 thisrow[2] += e7;
1229                 downrow[0] += e3;
1230                 downrow[1] += e5;
1231                 downrow[2] += e1;
1232                 return ret;
1233         }
1234
1235         inline int floyd1(int *thisrow, int *downrow, int src)
1236         {
1237                 src = (src << 4) | (src >> 4);
1238                 src += thisrow[1];
1239                 int ret = (src >= 2048);
1240                 // simulate decoding ("loop filter")
1241                 int loop = ret ? 4095 : 0;
1242                 int err = src - loop;
1243                 int e7 = (err * 7 + 8) / 16;
1244                 err -= e7;
1245                 int e3 = (err * 3 + 4) / 9;
1246                 err -= e3;
1247                 int e5 = (err * 5 + 3) / 6;
1248                 err -= e5;
1249                 int e1 = err;
1250                 thisrow[2] += e7;
1251                 downrow[0] += e3;
1252                 downrow[1] += e5;
1253                 downrow[2] += e1;
1254                 return ret;
1255         }
1256
1257         template<int srccomps, int alphabits, DitherMode dither>
1258         inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h)
1259         {
1260                 int x, y;
1261                 switch(dither)
1262                 {
1263                         case DITHER_NONE:
1264                                 {
1265                                         for(y = 0; y < h; ++y)
1266                                                 for(x = 0; x < w; ++x)
1267                                                 {
1268                                                         out[(x + y * w) * 4 + 0] = rgba[(x + y * w) * srccomps + 0] >> 3;
1269                                                         out[(x + y * w) * 4 + 1] = rgba[(x + y * w) * srccomps + 1] >> 2;
1270                                                         out[(x + y * w) * 4 + 2] = rgba[(x + y * w) * srccomps + 2] >> 3;
1271                                                 }
1272                                         if(srccomps == 4)
1273                                         {
1274                                                 if(alphabits == 1)
1275                                                 {
1276                                                         for(y = 0; y < h; ++y)
1277                                                                 for(x = 0; x < w; ++x)
1278                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> 7;
1279                                                 }
1280                                                 else if(alphabits == 8)
1281                                                 {
1282                                                         for(y = 0; y < h; ++y)
1283                                                                 for(x = 0; x < w; ++x)
1284                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1285                                                 }
1286                                                 else
1287                                                 {
1288                                                         for(y = 0; y < h; ++y)
1289                                                                 for(x = 0; x < w; ++x)
1290                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> (8 - alphabits);
1291                                                 }
1292                                         }
1293                                         else
1294                                         {
1295                                                 for(y = 0; y < h; ++y)
1296                                                         for(x = 0; x < w; ++x)
1297                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1298                                         }
1299                                 }
1300                                 break;
1301                         case DITHER_SIMPLE:
1302                                 {
1303                                         int x, y;
1304                                         int diffuse_r = 0;
1305                                         int diffuse_g = 0;
1306                                         int diffuse_b = 0;
1307                                         int diffuse_a = 0;
1308                                         for(y = 0; y < h; ++y)
1309                                                 for(x = 0; x < w; ++x)
1310                                                 {
1311                                                         out[(x + y * w) * 4 + 0] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1312                                                         out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1313                                                         out[(x + y * w) * 4 + 2] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1314                                                 }
1315                                         if(srccomps == 4)
1316                                         {
1317                                                 if(alphabits == 1)
1318                                                 {
1319                                                         for(y = 0; y < h; ++y)
1320                                                                 for(x = 0; x < w; ++x)
1321                                                                         out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1322                                                 }
1323                                                 else if(alphabits == 8)
1324                                                 {
1325                                                         for(y = 0; y < h; ++y)
1326                                                                 for(x = 0; x < w; ++x)
1327                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1328                                                 }
1329                                                 else
1330                                                 {
1331                                                         for(y = 0; y < h; ++y)
1332                                                                 for(x = 0; x < w; ++x)
1333                                                                         out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1334                                                 }
1335                                         }
1336                                         else
1337                                         {
1338                                                 for(y = 0; y < h; ++y)
1339                                                         for(x = 0; x < w; ++x)
1340                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1341                                         }
1342                                 }
1343                                 break;
1344                         case DITHER_FLOYDSTEINBERG:
1345                                 {
1346                                         int x, y;
1347                                         int pw = w+2;
1348                                         int downrow[6*pw];
1349                                         memset(downrow, 0, sizeof(downrow));
1350                                         int *thisrow_r, *thisrow_g, *thisrow_b, *thisrow_a;
1351                                         int *downrow_r, *downrow_g, *downrow_b, *downrow_a;
1352                                         for(y = 0; y < h; ++y)
1353                                         {
1354                                                 thisrow_r = downrow + ((y&1)?3:0) * pw;
1355                                                 downrow_r = downrow + ((y&1)?0:3) * pw;
1356                                                 memset(downrow_r, 0, sizeof(*downrow_r) * (3*pw));
1357                                                 thisrow_g = thisrow_r + pw;
1358                                                 thisrow_b = thisrow_g + pw;
1359                                                 downrow_g = downrow_r + pw;
1360                                                 downrow_b = downrow_g + pw;
1361                                                 for(x = 0; x < w; ++x)
1362                                                 {
1363                                                         out[(x + y * w) * 4 + 0] = floyd(&thisrow_r[x], &downrow_r[x], rgba[(x + y * w) * srccomps + 0], 3);
1364                                                         out[(x + y * w) * 4 + 1] = floyd(&thisrow_g[x], &downrow_g[x], rgba[(x + y * w) * srccomps + 1], 2);
1365                                                         out[(x + y * w) * 4 + 2] = floyd(&thisrow_b[x], &downrow_b[x], rgba[(x + y * w) * srccomps + 2], 3);
1366                                                 }
1367                                         }
1368                                         if(srccomps == 4)
1369                                         {
1370                                                 if(alphabits == 1)
1371                                                 {
1372                                                         for(y = 0; y < h; ++y)
1373                                                         {
1374                                                                 thisrow_a = downrow + (y&1) * pw;
1375                                                                 downrow_a = downrow + !(y&1) * pw;
1376                                                                 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1377                                                                 for(x = 0; x < w; ++x)
1378                                                                         out[(x + y * w) * 4 + 3] = floyd1(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3]);
1379                                                         }
1380                                                 }
1381                                                 else if(alphabits == 8)
1382                                                 {
1383                                                         for(y = 0; y < h; ++y)
1384                                                                 for(x = 0; x < w; ++x)
1385                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1386                                                 }
1387                                                 else
1388                                                 {
1389                                                         for(y = 0; y < h; ++y)
1390                                                         {
1391                                                                 thisrow_a = downrow + (y&1) * pw;
1392                                                                 downrow_a = downrow + !(y&1) * pw;
1393                                                                 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1394                                                                 for(x = 0; x < w; ++x)
1395                                                                         out[(x + y * w) * 4 + 3] = floyd(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1396                                                         }
1397                                                 }
1398                                         }
1399                                         else
1400                                         {
1401                                                 for(y = 0; y < h; ++y)
1402                                                         for(x = 0; x < w; ++x)
1403                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1404                                         }
1405                                 }
1406                                 break;
1407                 }
1408         }
1409
1410         template<int srccomps, int alphabits>
1411         inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, DitherMode dither)
1412         {
1413                 switch(dither)
1414                 {
1415                         case DITHER_NONE:
1416                                 rgb565_image<srccomps, alphabits, DITHER_NONE>(out, rgba, w, h);
1417                                 break;
1418                         default:
1419                         case DITHER_SIMPLE:
1420                                 rgb565_image<srccomps, alphabits, DITHER_SIMPLE>(out, rgba, w, h);
1421                                 break;
1422                         case DITHER_FLOYDSTEINBERG:
1423                                 rgb565_image<srccomps, alphabits, DITHER_FLOYDSTEINBERG>(out, rgba, w, h);
1424                                 break;
1425                 }
1426         }
1427
1428         template<int srccomps>
1429         inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int alphabits, DitherMode dither)
1430         {
1431                 switch(alphabits)
1432                 {
1433                         case 1:
1434                                 rgb565_image<srccomps, 1>(out, rgba, w, h, dither);
1435                                 break;
1436                         case 4:
1437                                 rgb565_image<srccomps, 4>(out, rgba, w, h, dither);
1438                                 break;
1439                         default:
1440                         case 8:
1441                                 rgb565_image<srccomps, 8>(out, rgba, w, h, dither);
1442                                 break;
1443                 }
1444         }
1445 };
1446
1447 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int alphabits, DitherMode dither)
1448 {
1449         switch(srccomps)
1450         {
1451                 case 3:
1452                         rgb565_image<3>(out, rgba, w, h, alphabits, dither);
1453                         break;
1454                 case 4:
1455                 default:
1456                         rgb565_image<4>(out, rgba, w, h, alphabits, dither);
1457                         break;
1458         }
1459 }