OSDN Git Service

make the refining step optional
[android-x86/external-s2tc.git] / s2tc_compressor.cpp
1 #include <math.h>
2 #include <stdlib.h>
3 #include <string.h>
4
5 #include "s2tc_compressor.h"
6 #include "s2tc_common.h"
7
8 namespace
9 {
10         typedef struct
11         {
12                 signed char r, g, b;
13         }
14         color_t;
15
16         inline bool operator<(const color_t &a, const color_t &b)
17         {
18                 signed char d;
19                 d = a.r - b.r;
20                 if(d)
21                         return d < 0;
22                 d = a.g - b.g;
23                 if(d)
24                         return d < 0;
25                 d = a.b - b.b;
26                 return d < 0;
27         }
28         // 16 differences must fit in int
29         // i.e. a difference must be lower than 2^27
30
31         // shift right, rounded
32 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
33
34         inline int color_dist_avg(const color_t &a, const color_t &b)
35         {
36                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
37                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
38                 int db = a.b - b.b; // multiplier: 31 (-1..1)
39                 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
40         }
41
42         inline int color_dist_wavg(const color_t &a, const color_t &b)
43         {
44                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
45                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
46                 int db = a.b - b.b; // multiplier: 31 (-1..1)
47                 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
48                 // weighted 4:16:1
49         }
50
51         inline int color_dist_yuv(const color_t &a, const color_t &b)
52         {
53                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
54                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
55                 int db = a.b - b.b; // multiplier: 31 (-1..1)
56                 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
57                 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
58                 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
59                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
60                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
61                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
62         }
63
64         inline int color_dist_rgb(const color_t &a, const color_t &b)
65         {
66                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
67                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
68                 int db = a.b - b.b; // multiplier: 31 (-1..1)
69                 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
70                 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
71                 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
72                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
73                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
74                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
75         }
76
77         inline int color_dist_srgb(const color_t &a, const color_t &b)
78         {
79                 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
80                 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
81                 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
82                 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
83                 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
84                 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
85                 int sy = SHRR(y, 3) * SHRR(y, 4);
86                 int su = SHRR(u, 3) * SHRR(u, 4);
87                 int sv = SHRR(v, 3) * SHRR(v, 4);
88                 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
89                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
90                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
91         }
92
93         inline int srgb_get_y(const color_t &a)
94         {
95                 // convert to linear
96                 int r = a.r * (int) a.r;
97                 int g = a.g * (int) a.g;
98                 int b = a.b * (int) a.b;
99                 // find luminance
100                 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
101                 // square root it (!)
102                 y = sqrt(y); // now in range 0 to 3815
103                 return y;
104         }
105
106         inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
107         {
108                 // get Y
109                 int ay = srgb_get_y(a);
110                 int by = srgb_get_y(b);
111                 // get UV
112                 int au = a.r * 191 - ay;
113                 int av = a.b * 191 - ay;
114                 int bu = b.r * 191 - by;
115                 int bv = b.b * 191 - by;
116                 // get differences
117                 int y = ay - by;
118                 int u = au - bu;
119                 int v = av - bv;
120                 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
121                 // weight for u: ???
122                 // weight for v: ???
123         }
124
125         // FIXME this is likely broken
126         inline int color_dist_lab_srgb(const color_t &a, const color_t &b)
127         {
128                 // undo sRGB
129                 float ar = powf(a.r / 31.0f, 2.4f);
130                 float ag = powf(a.g / 63.0f, 2.4f);
131                 float ab = powf(a.b / 31.0f, 2.4f);
132                 float br = powf(b.r / 31.0f, 2.4f);
133                 float bg = powf(b.g / 63.0f, 2.4f);
134                 float bb = powf(b.b / 31.0f, 2.4f);
135                 // convert to CIE XYZ
136                 float aX = 0.4124f * ar + 0.3576f * ag + 0.1805f * ab;
137                 float aY = 0.2126f * ar + 0.7152f * ag + 0.0722f * ab;
138                 float aZ = 0.0193f * ar + 0.1192f * ag + 0.9505f * ab;
139                 float bX = 0.4124f * br + 0.3576f * bg + 0.1805f * bb;
140                 float bY = 0.2126f * br + 0.7152f * bg + 0.0722f * bb;
141                 float bZ = 0.0193f * br + 0.1192f * bg + 0.9505f * bb;
142                 // convert to CIE Lab
143                 float Xn = 0.3127f;
144                 float Yn = 0.3290f;
145                 float Zn = 0.3583f;
146                 float aL = 116 * cbrtf(aY / Yn) - 16;
147                 float aA = 500 * (cbrtf(aX / Xn) - cbrtf(aY / Yn));
148                 float aB = 200 * (cbrtf(aY / Yn) - cbrtf(aZ / Zn));
149                 float bL = 116 * cbrtf(bY / Yn) - 16;
150                 float bA = 500 * (cbrtf(bX / Xn) - cbrtf(bY / Yn));
151                 float bB = 200 * (cbrtf(bY / Yn) - cbrtf(bZ / Zn));
152                 // euclidean distance, but moving weight away from A and B
153                 return 1000 * ((aL - bL) * (aL - bL) + (aA - bA) * (aA - bA) + (aB - bB) * (aB - bB));
154         }
155
156         inline int color_dist_normalmap(const color_t &a, const color_t &b)
157         {
158                 float ca[3], cb[3];
159                 ca[0] = a.r / 31.0 * 2 - 1;
160                 ca[1] = a.g / 63.0 * 2 - 1;
161                 ca[2] = a.b / 31.0 * 2 - 1;
162                 cb[0] = b.r / 31.0 * 2 - 1;
163                 cb[1] = b.g / 63.0 * 2 - 1;
164                 cb[2] = b.b / 31.0 * 2 - 1;
165
166                 return
167                         500 *
168                         (
169                                 (cb[0] - ca[0]) * (cb[0] - ca[0])
170                                 +
171                                 (cb[1] - ca[1]) * (cb[1] - ca[1])
172                                 +
173                                 (cb[2] - ca[2]) * (cb[2] - ca[2])
174                         )
175                         ;
176                 // max value: 500 * (4 + 4 + 4) = 6000
177         }
178
179         typedef int ColorDistFunc(const color_t &a, const color_t &b);
180
181         inline int alpha_dist(unsigned char a, unsigned char b)
182         {
183                 return (a - (int) b) * (a - (int) b);
184         }
185
186         template <class T, class F>
187         // n: input count
188         // m: total color count (including non-counted inputs)
189         // m >= n
190         inline void reduce_colors_inplace(T *c, int n, int m, F dist)
191         {
192                 int i, j, k;
193                 int bestsum = -1;
194                 int besti = 0;
195                 int bestj = 1;
196                 int dists[m][n];
197                 // first the square
198                 for(i = 0; i < n; ++i)
199                 {
200                         dists[i][i] = 0;
201                         for(j = i+1; j < n; ++j)
202                         {
203                                 int d = dist(c[i], c[j]);
204                                 dists[i][j] = dists[j][i] = d;
205                         }
206                 }
207                 // then the box
208                 for(; i < m; ++i)
209                 {
210                         for(j = 0; j < n; ++j)
211                         {
212                                 int d = dist(c[i], c[j]);
213                                 dists[i][j] = d;
214                         }
215                 }
216                 for(i = 0; i < m; ++i)
217                         for(j = i+1; j < m; ++j)
218                         {
219                                 int sum = 0;
220                                 for(k = 0; k < n; ++k)
221                                 {
222                                         int di = dists[i][k];
223                                         int dj = dists[j][k];
224                                         int m  = min(di, dj);
225                                         sum += m;
226                                 }
227                                 if(bestsum < 0 || sum < bestsum)
228                                 {
229                                         bestsum = sum;
230                                         besti = i;
231                                         bestj = j;
232                                 }
233                         }
234                 if(besti != 0)
235                         c[0] = c[besti];
236                 if(bestj != 1)
237                         c[1] = c[bestj];
238         }
239         template <class T, class F>
240         inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
241         {
242                 int i, j, k;
243                 int bestsum = -1;
244                 int besti = 0;
245                 int bestj = 1;
246                 int dists[m+2][n];
247                 // first the square
248                 for(i = 0; i < n; ++i)
249                 {
250                         dists[i][i] = 0;
251                         for(j = i+1; j < n; ++j)
252                         {
253                                 int d = dist(c[i], c[j]);
254                                 dists[i][j] = dists[j][i] = d;
255                         }
256                 }
257                 // then the box
258                 for(; i < m; ++i)
259                 {
260                         for(j = 0; j < n; ++j)
261                         {
262                                 int d = dist(c[i], c[j]);
263                                 dists[i][j] = d;
264                         }
265                 }
266                 // then the two extra rows
267                 for(j = 0; j < n; ++j)
268                 {
269                         int d = dist(fix0, c[j]);
270                         dists[m][j] = d;
271                 }
272                 for(j = 0; j < n; ++j)
273                 {
274                         int d = dist(fix1, c[j]);
275                         dists[m+1][j] = d;
276                 }
277                 for(i = 0; i < m; ++i)
278                         for(j = i+1; j < m; ++j)
279                         {
280                                 int sum = 0;
281                                 for(k = 0; k < n; ++k)
282                                 {
283                                         int di = dists[i][k];
284                                         int dj = dists[j][k];
285                                         int d0 = dists[m][k];
286                                         int d1 = dists[m+1][k];
287                                         int m  = min(min(di, dj), min(d0, d1));
288                                         sum += m;
289                                 }
290                                 if(bestsum < 0 || sum < bestsum)
291                                 {
292                                         bestsum = sum;
293                                         besti = i;
294                                         bestj = j;
295                                 }
296                         }
297                 if(besti != 0)
298                         c[0] = c[besti];
299                 if(bestj != 1)
300                         c[1] = c[bestj];
301         }
302
303         enum CompressionMode
304         {
305                 MODE_NORMAL,
306                 MODE_RANDOM,
307                 MODE_FAST
308         };
309
310         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, bool refine>
311         inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
312         {
313                 color_t c[16 + (mode == MODE_RANDOM ? nrandom : 0)];
314
315                 unsigned char ca[16];
316                 int n = 0, m = 0;
317                 int x, y;
318
319                 if(mode == MODE_FAST)
320                 {
321                         color_t c0 = {0, 0, 0};
322
323                         c[0].r = rgba[2];
324                         c[0].g = rgba[1];
325                         c[0].b = rgba[0];
326                         c[1] = c[0];
327                         int dmin = ColorDist(c[0], c0);
328                         int dmax = dmin;
329                         if(dxt == DXT5)
330                         {
331                                 ca[0] = rgba[3];
332                                 ca[1] = ca[0];
333                         }
334
335                         for(x = 0; x < w; ++x)
336                                 for(y = !x; y < h; ++y)
337                                 {
338                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
339                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
340                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
341
342                                         int d = ColorDist(c[2], c0);
343                                         if(d > dmax)
344                                         {
345                                                 dmax = d;
346                                                 c[1] = c[2];
347                                         }
348                                         if(d < dmin)
349                                         {
350                                                 dmin = d;
351                                                 c[0] = c[2];
352                                         }
353
354                                         if(dxt == DXT5)
355                                         {
356                                                 ca[2]  = rgba[(x + y * iw) * 4 + 3];
357                                                 if(ca[2] > ca[1])
358                                                         ca[1] = ca[2];
359                                                 if(ca[2] < ca[0])
360                                                         ca[0] = ca[2];
361                                         }
362                                 }
363
364                         m = n = 2;
365                 }
366                 else
367                 {
368                         for(x = 0; x < w; ++x)
369                                 for(y = 0; y < h; ++y)
370                                 {
371                                         c[n].r = rgba[(x + y * iw) * 4 + 2];
372                                         c[n].g = rgba[(x + y * iw) * 4 + 1];
373                                         c[n].b = rgba[(x + y * iw) * 4 + 0];
374                                         if(dxt == DXT5)
375                                                 ca[n]  = rgba[(x + y * iw) * 4 + 3];
376                                         ++n;
377                                 }
378                         m = n;
379
380                         if(mode == MODE_RANDOM)
381                         {
382                                 color_t mins = c[0];
383                                 color_t maxs = c[0];
384                                 for(x = 1; x < n; ++x)
385                                 {
386                                         mins.r = min(mins.r, c[x].r);
387                                         mins.g = min(mins.g, c[x].g);
388                                         mins.b = min(mins.b, c[x].b);
389                                         maxs.r = max(maxs.r, c[x].r);
390                                         maxs.g = max(maxs.g, c[x].g);
391                                         maxs.b = max(maxs.b, c[x].b);
392                                 }
393                                 color_t len = { maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1 };
394                                 for(x = 0; x < nrandom; ++x)
395                                 {
396                                         c[m].r = mins.r + rand() % len.r;
397                                         c[m].g = mins.g + rand() % len.g;
398                                         c[m].b = mins.b + rand() % len.b;
399                                         ++m;
400                                 }
401                         }
402                         else
403                         {
404                                 // hack for last miplevel
405                                 if(n == 1)
406                                 {
407                                         c[1] = c[0];
408                                         m = n = 2;
409                                 }
410                         }
411
412                         reduce_colors_inplace(c, n, m, ColorDist);
413                         if(dxt == DXT5)
414                                 reduce_colors_inplace_2fixpoints(ca, n, n, alpha_dist, (unsigned char) 0, (unsigned char) 255);
415                 }
416
417                 if(!refine)
418                 {
419                         if(dxt == DXT5)
420                         {
421                                 if(ca[1] < ca[0])
422                                 {
423                                         ca[2] = ca[0];
424                                         ca[0] = ca[1];
425                                         ca[1] = ca[2];
426                                 }
427                         }
428                         if(c[1] < c[0])
429                         {
430                                 c[2] = c[0];
431                                 c[0] = c[1];
432                                 c[1] = c[2];
433                         }
434                 }
435
436                 int nc0 = 0, na0 = 0, sc0r = 0, sc0g = 0, sc0b = 0, sa0 = 0;
437                 int nc1 = 0, na1 = 0, sc1r = 0, sc1g = 0, sc1b = 0, sa1 = 0;
438
439                 memset(out, 0, (dxt == DXT1) ? 8 : 16);
440                 for(x = 0; x < w; ++x)
441                         for(y = 0; y < h; ++y)
442                         {
443                                 int pindex = (x+y*4);
444                                 c[2].r = rgba[(x + y * iw) * 4 + 2];
445                                 c[2].g = rgba[(x + y * iw) * 4 + 1];
446                                 c[2].b = rgba[(x + y * iw) * 4 + 0];
447                                 ca[2]  = rgba[(x + y * iw) * 4 + 3];
448                                 switch(dxt)
449                                 {
450                                         case DXT5:
451                                                 {
452                                                         int da[4];
453                                                         int bitindex = pindex * 3;
454                                                         da[0] = alpha_dist(ca[0], ca[2]);
455                                                         da[1] = alpha_dist(ca[1], ca[2]);
456                                                         da[2] = alpha_dist(0, ca[2]);
457                                                         da[3] = alpha_dist(255, ca[2]);
458                                                         if(da[2] <= da[0] && da[2] <= da[1] && da[2] <= da[3])
459                                                         {
460                                                                 // 6
461                                                                 ++bitindex;
462                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
463                                                                 ++bitindex;
464                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
465                                                         }
466                                                         else if(da[3] <= da[0] && da[3] <= da[1])
467                                                         {
468                                                                 // 7
469                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
470                                                                 ++bitindex;
471                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
472                                                                 ++bitindex;
473                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
474                                                         }
475                                                         else if(da[0] <= da[1])
476                                                         {
477                                                                 // 0
478                                                                 if(refine)
479                                                                 {
480                                                                         ++na0;
481                                                                         sa0 += ca[2];
482                                                                 }
483                                                         }
484                                                         else
485                                                         {
486                                                                 // 1
487                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
488                                                                 if(refine)
489                                                                 {
490                                                                         ++na1;
491                                                                         sa1 += ca[2];
492                                                                 }
493                                                         }
494                                                 }
495                                                 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
496                                                 {
497                                                         int bitindex = pindex * 2;
498                                                         out[bitindex / 8 + 12] |= (1 << (bitindex % 8));
499                                                         if(refine)
500                                                         {
501                                                                 ++nc1;
502                                                                 sc1r += c[2].r;
503                                                                 sc1g += c[2].g;
504                                                                 sc1b += c[2].b;
505                                                         }
506                                                 }
507                                                 else
508                                                 {
509                                                         if(refine)
510                                                         {
511                                                                 ++nc0;
512                                                                 sc0r += c[2].r;
513                                                                 sc0g += c[2].g;
514                                                                 sc0b += c[2].b;
515                                                         }
516                                                 }
517                                                 break;
518                                         case DXT3:
519                                                 {
520                                                         int bitindex = pindex * 4;
521                                                         out[bitindex / 8 + 0] |= (ca[2] << (bitindex % 8));
522                                                 }
523                                                 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
524                                                 {
525                                                         int bitindex = pindex * 2;
526                                                         out[bitindex / 8 + 12] |= (1 << (bitindex % 8));
527                                                         if(refine)
528                                                         {
529                                                                 ++nc1;
530                                                                 sc1r += c[2].r;
531                                                                 sc1g += c[2].g;
532                                                                 sc1b += c[2].b;
533                                                         }
534                                                 }
535                                                 else
536                                                 {
537                                                         if(refine)
538                                                         {
539                                                                 ++nc0;
540                                                                 sc0r += c[2].r;
541                                                                 sc0g += c[2].g;
542                                                                 sc0b += c[2].b;
543                                                         }
544                                                 }
545                                                 break;
546                                         case DXT1:
547                                                 {
548                                                         int bitindex = pindex * 2;
549                                                         if(!ca[2])
550                                                                 out[bitindex / 8 + 4] |= (3 << (bitindex % 8));
551                                                         else if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
552                                                         {
553                                                                 out[bitindex / 8 + 4] |= (1 << (bitindex % 8));
554                                                                 if(refine)
555                                                                 {
556                                                                         ++nc1;
557                                                                         sc1r += c[2].r;
558                                                                         sc1g += c[2].g;
559                                                                         sc1b += c[2].b;
560                                                                 }
561                                                         }
562                                                         else
563                                                         {
564                                                                 if(refine)
565                                                                 {
566                                                                         ++nc0;
567                                                                         sc0r += c[2].r;
568                                                                         sc0g += c[2].g;
569                                                                         sc0b += c[2].b;
570                                                                 }
571                                                         }
572                                                 }
573                                                 break;
574                                 }
575                         }
576                 if(refine)
577                 {
578                         if(dxt == DXT5)
579                         {
580                                 if(na0)
581                                         ca[0] = (2 * sa0 + na0) / (2 * na0);
582                                 if(na1)
583                                         ca[1] = (2 * sa1 + na1) / (2 * na1);
584                         }
585                         if(nc0)
586                         {
587                                 c[0].r = (2 * sc0r + nc0) / (2 * nc0);
588                                 c[0].g = (2 * sc0g + nc0) / (2 * nc0);
589                                 c[0].b = (2 * sc0b + nc0) / (2 * nc0);
590                         }
591                         if(nc1)
592                         {
593                                 c[1].r = (2 * sc1r + nc1) / (2 * nc1);
594                                 c[1].g = (2 * sc1g + nc1) / (2 * nc1);
595                                 c[1].b = (2 * sc1b + nc1) / (2 * nc1);
596                         }
597
598                         if(dxt == DXT5)
599                         {
600                                 if(ca[1] < ca[0])
601                                 {
602                                         ca[2] = ca[0];
603                                         ca[0] = ca[1];
604                                         ca[1] = ca[2];
605                                         // swap the alphas
606                                         for(int pindex = 0; pindex < 16; ++pindex)
607                                         {
608                                                 int bitindex_set = pindex * 3;
609                                                 int bitindex_test = bitindex_set + 3;
610                                                 if(!(out[bitindex_test / 8] & (1 << (bitindex_test % 8))))
611                                                         out[bitindex_set / 8] ^= (1 << (bitindex_set % 8));
612                                         }
613                                 }
614                         }
615                         if(c[1] < c[0])
616                         {
617                                 c[2] = c[0];
618                                 c[0] = c[1];
619                                 c[1] = c[2];
620                                 // swap the colors
621                                 if(dxt == DXT1)
622                                 {
623                                         out[4] ^= 0x55 & ~(out[4] >> 1);
624                                         out[5] ^= 0x55 & ~(out[5] >> 1);
625                                         out[6] ^= 0x55 & ~(out[6] >> 1);
626                                         out[7] ^= 0x55 & ~(out[7] >> 1);
627                                 }
628                                 else
629                                 {
630                                         out[12] ^= 0x55 & ~(out[12] >> 1);
631                                         out[13] ^= 0x55 & ~(out[13] >> 1);
632                                         out[14] ^= 0x55 & ~(out[14] >> 1);
633                                         out[15] ^= 0x55 & ~(out[15] >> 1);
634                                 }
635                         }
636                 }
637                 switch(dxt)
638                 {
639                         case DXT5:
640                                 out[0] = ca[0];
641                                 out[1] = ca[1];
642                         case DXT3:
643                                 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
644                                 out[9] = (c[0].r << 3) | (c[0].g >> 3);
645                                 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
646                                 out[11] = (c[1].r << 3) | (c[1].g >> 3);
647                                 break;
648                         case DXT1:
649                                 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
650                                 out[1] = (c[0].r << 3) | (c[0].g >> 3);
651                                 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
652                                 out[3] = (c[1].r << 3) | (c[1].g >> 3);
653                                 break;
654                 }
655         }
656
657         // compile time dispatch magic
658         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
659         inline s2tc_encode_block_func_t s2tc_encode_block_func(bool refine)
660         {
661                 if(refine)
662                         return s2tc_encode_block<dxt, ColorDist, mode, true>;
663                 else
664                         return s2tc_encode_block<dxt, ColorDist, mode, false>;
665         }
666
667         template<DxtMode dxt, ColorDistFunc ColorDist>
668         inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, bool refine)
669         {
670                 if(nrandom > 0)
671                         return s2tc_encode_block_func<dxt, ColorDist, MODE_RANDOM>(refine);
672                 else if(nrandom == 0)
673                         return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
674                 else
675                         return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
676         }
677
678         template<ColorDistFunc ColorDist>
679         inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, bool refine)
680         {
681                 switch(dxt)
682                 {
683                         case DXT1:
684                                 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
685                                 break;
686                         case DXT3:
687                                 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
688                                 break;
689                         default:
690                         case DXT5:
691                                 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
692                                 break;
693                 }
694         }
695 };
696
697 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, bool refine)
698 {
699         switch(cd)
700         {
701                 case RGB:
702                         return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
703                         break;
704                 case YUV:
705                         return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
706                         break;
707                 case SRGB:
708                         return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
709                         break;
710                 case SRGB_MIXED:
711                         return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
712                         break;
713                 case LAB:
714                         return s2tc_encode_block_func<color_dist_lab_srgb>(dxt, nrandom, refine);
715                         break;
716                 case AVG:
717                         return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
718                         break;
719                 default:
720                 case WAVG:
721                         return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
722                         break;
723                 case NORMALMAP:
724                         return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
725                         break;
726         }
727 }
728
729 namespace
730 {
731         inline int diffuse(int *diff, int src, int shift)
732         {
733                 int maxval = (1 << (8 - shift)) - 1;
734                 src += *diff;
735                 int ret = max(0, min(src >> shift, maxval));
736                 // simulate decoding ("loop filter")
737                 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
738                 *diff = src - loop;
739                 return ret;
740         }
741 };
742
743 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int bgr, int alphabits)
744 {
745         int x, y;
746         int diffuse_r = 0;
747         int diffuse_g = 0;
748         int diffuse_b = 0;
749         int diffuse_a = 0;
750         if(bgr)
751         {
752                 for(y = 0; y < h; ++y)
753                         for(x = 0; x < w; ++x)
754                         {
755                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 2], 3);
756                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
757                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 0], 3);
758                         }
759         }
760         else
761         {
762                 for(y = 0; y < h; ++y)
763                         for(x = 0; x < w; ++x)
764                         {
765                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
766                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
767                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
768                         }
769         }
770         if(srccomps == 4)
771         {
772                 int alphadiffuse = 8 - alphabits;
773                 for(y = 0; y < h; ++y)
774                         for(x = 0; x < w; ++x)
775                                 out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], alphadiffuse);
776         }
777         else
778         {
779                 int alpharange = (1 << alphabits) - 1;
780                 for(y = 0; y < h; ++y)
781                         for(x = 0; x < w; ++x)
782                                 out[(x + y * w) * 4 + 3] = alpharange;
783         }
784 }
785