OSDN Git Service

REFINE_LOOP: minor improvement
[android-x86/external-s2tc.git] / s2tc_compressor.cpp
1 #include <math.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <stdio.h>
5
6 #include "s2tc_compressor.h"
7 #include "s2tc_common.h"
8
9 namespace
10 {
11         typedef struct
12         {
13                 signed char r, g, b;
14         }
15         color_t;
16
17         inline bool operator<(const color_t &a, const color_t &b)
18         {
19                 signed char d;
20                 d = a.r - b.r;
21                 if(d)
22                         return d < 0;
23                 d = a.g - b.g;
24                 if(d)
25                         return d < 0;
26                 d = a.b - b.b;
27                 return d < 0;
28         }
29         // 16 differences must fit in int
30         // i.e. a difference must be lower than 2^27
31
32         // shift right, rounded
33 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
34
35         inline int color_dist_avg(const color_t &a, const color_t &b)
36         {
37                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
38                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
39                 int db = a.b - b.b; // multiplier: 31 (-1..1)
40                 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
41         }
42
43         inline int color_dist_wavg(const color_t &a, const color_t &b)
44         {
45                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
46                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
47                 int db = a.b - b.b; // multiplier: 31 (-1..1)
48                 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
49                 // weighted 4:16:1
50         }
51
52         inline int color_dist_yuv(const color_t &a, const color_t &b)
53         {
54                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
55                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
56                 int db = a.b - b.b; // multiplier: 31 (-1..1)
57                 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
58                 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
59                 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
60                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
61                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
62                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
63         }
64
65         inline int color_dist_rgb(const color_t &a, const color_t &b)
66         {
67                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
68                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
69                 int db = a.b - b.b; // multiplier: 31 (-1..1)
70                 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
71                 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
72                 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
73                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
74                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
75                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
76         }
77
78         inline int color_dist_srgb(const color_t &a, const color_t &b)
79         {
80                 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
81                 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
82                 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
83                 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
84                 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
85                 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
86                 int sy = SHRR(y, 3) * SHRR(y, 4);
87                 int su = SHRR(u, 3) * SHRR(u, 4);
88                 int sv = SHRR(v, 3) * SHRR(v, 4);
89                 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
90                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
91                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
92         }
93
94         inline int srgb_get_y(const color_t &a)
95         {
96                 // convert to linear
97                 int r = a.r * (int) a.r;
98                 int g = a.g * (int) a.g;
99                 int b = a.b * (int) a.b;
100                 // find luminance
101                 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
102                 // square root it (!)
103                 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
104                 return y;
105         }
106
107         inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
108         {
109                 // get Y
110                 int ay = srgb_get_y(a);
111                 int by = srgb_get_y(b);
112                 // get UV
113                 int au = a.r * 191 - ay;
114                 int av = a.b * 191 - ay;
115                 int bu = b.r * 191 - by;
116                 int bv = b.b * 191 - by;
117                 // get differences
118                 int y = ay - by;
119                 int u = au - bu;
120                 int v = av - bv;
121                 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
122                 // weight for u: ???
123                 // weight for v: ???
124         }
125
126         // FIXME this is likely broken
127         inline int color_dist_lab_srgb(const color_t &a, const color_t &b)
128         {
129                 // undo sRGB
130                 float ar = powf(a.r / 31.0f, 2.4f);
131                 float ag = powf(a.g / 63.0f, 2.4f);
132                 float ab = powf(a.b / 31.0f, 2.4f);
133                 float br = powf(b.r / 31.0f, 2.4f);
134                 float bg = powf(b.g / 63.0f, 2.4f);
135                 float bb = powf(b.b / 31.0f, 2.4f);
136                 // convert to CIE XYZ
137                 float aX = 0.4124f * ar + 0.3576f * ag + 0.1805f * ab;
138                 float aY = 0.2126f * ar + 0.7152f * ag + 0.0722f * ab;
139                 float aZ = 0.0193f * ar + 0.1192f * ag + 0.9505f * ab;
140                 float bX = 0.4124f * br + 0.3576f * bg + 0.1805f * bb;
141                 float bY = 0.2126f * br + 0.7152f * bg + 0.0722f * bb;
142                 float bZ = 0.0193f * br + 0.1192f * bg + 0.9505f * bb;
143                 // convert to CIE Lab
144                 float Xn = 0.3127f;
145                 float Yn = 0.3290f;
146                 float Zn = 0.3583f;
147                 float aL = 116 * cbrtf(aY / Yn) - 16;
148                 float aA = 500 * (cbrtf(aX / Xn) - cbrtf(aY / Yn));
149                 float aB = 200 * (cbrtf(aY / Yn) - cbrtf(aZ / Zn));
150                 float bL = 116 * cbrtf(bY / Yn) - 16;
151                 float bA = 500 * (cbrtf(bX / Xn) - cbrtf(bY / Yn));
152                 float bB = 200 * (cbrtf(bY / Yn) - cbrtf(bZ / Zn));
153                 // euclidean distance, but moving weight away from A and B
154                 return 1000 * ((aL - bL) * (aL - bL) + (aA - bA) * (aA - bA) + (aB - bB) * (aB - bB));
155         }
156
157         inline int color_dist_normalmap(const color_t &a, const color_t &b)
158         {
159                 float ca[3], cb[3], n;
160                 ca[0] = a.r / 31.0f * 2 - 1;
161                 ca[1] = a.g / 63.0f * 2 - 1;
162                 ca[2] = a.b / 31.0f * 2 - 1;
163                 cb[0] = b.r / 31.0f * 2 - 1;
164                 cb[1] = b.g / 63.0f * 2 - 1;
165                 cb[2] = b.b / 31.0f * 2 - 1;
166                 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
167                 if(n > 0)
168                 {
169                         n = 1.0f / sqrtf(n);
170                         ca[0] *= n;
171                         ca[1] *= n;
172                         ca[2] *= n;
173                 }
174                 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
175                 if(n > 0)
176                 {
177                         n = 1.0f / sqrtf(n);
178                         cb[0] *= n;
179                         cb[1] *= n;
180                         cb[2] *= n;
181                 }
182
183                 return
184                         100000 *
185                         (
186                                 (cb[0] - ca[0]) * (cb[0] - ca[0])
187                                 +
188                                 (cb[1] - ca[1]) * (cb[1] - ca[1])
189                                 +
190                                 (cb[2] - ca[2]) * (cb[2] - ca[2])
191                         )
192                         ;
193                 // max value: 1000 * (4 + 4 + 4) = 6000
194         }
195
196         typedef int ColorDistFunc(const color_t &a, const color_t &b);
197
198         inline int alpha_dist(unsigned char a, unsigned char b)
199         {
200                 return (a - (int) b) * (a - (int) b);
201         }
202
203         template <class T, class F>
204         // n: input count
205         // m: total color count (including non-counted inputs)
206         // m >= n
207         inline void reduce_colors_inplace(T *c, int n, int m, F dist)
208         {
209                 int i, j, k;
210                 int bestsum = -1;
211                 int besti = 0;
212                 int bestj = 1;
213                 int dists[m][n];
214                 // first the square
215                 for(i = 0; i < n; ++i)
216                 {
217                         dists[i][i] = 0;
218                         for(j = i+1; j < n; ++j)
219                         {
220                                 int d = dist(c[i], c[j]);
221                                 dists[i][j] = dists[j][i] = d;
222                         }
223                 }
224                 // then the box
225                 for(; i < m; ++i)
226                 {
227                         for(j = 0; j < n; ++j)
228                         {
229                                 int d = dist(c[i], c[j]);
230                                 dists[i][j] = d;
231                         }
232                 }
233                 for(i = 0; i < m; ++i)
234                         for(j = i+1; j < m; ++j)
235                         {
236                                 int sum = 0;
237                                 for(k = 0; k < n; ++k)
238                                 {
239                                         int di = dists[i][k];
240                                         int dj = dists[j][k];
241                                         int m  = min(di, dj);
242                                         sum += m;
243                                 }
244                                 if(bestsum < 0 || sum < bestsum)
245                                 {
246                                         bestsum = sum;
247                                         besti = i;
248                                         bestj = j;
249                                 }
250                         }
251                 if(besti != 0)
252                         c[0] = c[besti];
253                 if(bestj != 1)
254                         c[1] = c[bestj];
255         }
256         template <class T, class F>
257         inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
258         {
259                 int i, j, k;
260                 int bestsum = -1;
261                 int besti = 0;
262                 int bestj = 1;
263                 int dists[m+2][n];
264                 // first the square
265                 for(i = 0; i < n; ++i)
266                 {
267                         dists[i][i] = 0;
268                         for(j = i+1; j < n; ++j)
269                         {
270                                 int d = dist(c[i], c[j]);
271                                 dists[i][j] = dists[j][i] = d;
272                         }
273                 }
274                 // then the box
275                 for(; i < m; ++i)
276                 {
277                         for(j = 0; j < n; ++j)
278                         {
279                                 int d = dist(c[i], c[j]);
280                                 dists[i][j] = d;
281                         }
282                 }
283                 // then the two extra rows
284                 for(j = 0; j < n; ++j)
285                 {
286                         int d = dist(fix0, c[j]);
287                         dists[m][j] = d;
288                 }
289                 for(j = 0; j < n; ++j)
290                 {
291                         int d = dist(fix1, c[j]);
292                         dists[m+1][j] = d;
293                 }
294                 for(i = 0; i < m; ++i)
295                         for(j = i+1; j < m; ++j)
296                         {
297                                 int sum = 0;
298                                 for(k = 0; k < n; ++k)
299                                 {
300                                         int di = dists[i][k];
301                                         int dj = dists[j][k];
302                                         int d0 = dists[m][k];
303                                         int d1 = dists[m+1][k];
304                                         int m  = min(min(di, dj), min(d0, d1));
305                                         sum += m;
306                                 }
307                                 if(bestsum < 0 || sum < bestsum)
308                                 {
309                                         bestsum = sum;
310                                         besti = i;
311                                         bestj = j;
312                                 }
313                         }
314                 if(besti != 0)
315                         c[0] = c[besti];
316                 if(bestj != 1)
317                         c[1] = c[bestj];
318         }
319
320         enum CompressionMode
321         {
322                 MODE_NORMAL,
323                 MODE_RANDOM,
324                 MODE_FAST
325         };
326
327         template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
328         {
329                 return comp;
330         }
331         template<> inline int refine_component_encode<color_dist_srgb>(int comp)
332         {
333                 return comp * comp;
334         }
335         template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
336         {
337                 return comp * comp;
338         }
339         template<> inline int refine_component_encode<color_dist_lab_srgb>(int comp)
340         {
341                 return comp * comp;
342         }
343
344         template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
345         {
346                 return comp;
347         }
348         template<> inline int refine_component_decode<color_dist_srgb>(int comp)
349         {
350                 return sqrtf(comp) + 0.5f;
351         }
352         template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
353         {
354                 return sqrtf(comp) + 0.5f;
355         }
356         template<> inline int refine_component_decode<color_dist_lab_srgb>(int comp)
357         {
358                 return sqrtf(comp) + 0.5f;
359         }
360
361         // these color dist functions ignore color values at alpha 0
362         template<ColorDistFunc ColorDist> struct alpha_0_is_unimportant
363         {
364                 static bool const value = true;
365         };
366         template<> struct alpha_0_is_unimportant<color_dist_normalmap>
367         {
368                 static bool const value = false;
369         };
370
371         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
372         inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
373         {
374                 color_t c[16 + (mode == MODE_RANDOM ? nrandom : 0)];
375                 unsigned char ca[16 + (mode == MODE_RANDOM ? nrandom : 0)];
376                 int n = 0, m = 0;
377                 int x, y;
378
379                 if(mode == MODE_FAST)
380                 {
381                         // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
382
383                         color_t c0 = {0, 0, 0};
384
385                         // dummy values because we don't know whether the first pixel willw rite
386                         c[0].r = 31;
387                         c[0].g = 63;
388                         c[0].b = 31;
389                         c[1].r = 0;
390                         c[1].g = 0;
391                         c[1].b = 0;
392                         int dmin = 0x7FFFFFFF;
393                         int dmax = 0;
394                         if(dxt == DXT5)
395                         {
396                                 ca[0] = rgba[3];
397                                 ca[1] = ca[0];
398                         }
399
400                         for(x = 0; x < w; ++x)
401                                 for(y = 0; y < h; ++y)
402                                 {
403                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
404                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
405                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
406                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
407                                         // MODE_FAST doesn't work for normalmaps, so this works
408                                         if(!ca[2])
409                                                 continue;
410
411                                         int d = ColorDist(c[2], c0);
412                                         if(d > dmax)
413                                         {
414                                                 dmax = d;
415                                                 c[1] = c[2];
416                                         }
417                                         if(d < dmin)
418                                         {
419                                                 dmin = d;
420                                                 c[0] = c[2];
421                                         }
422
423                                         if(dxt == DXT5)
424                                         {
425                                                 if(ca[2] != 255)
426                                                 {
427                                                         if(ca[2] > ca[1])
428                                                                 ca[1] = ca[2];
429                                                         if(ca[2] < ca[0])
430                                                                 ca[0] = ca[2];
431                                                 }
432                                         }
433                                 }
434
435                         // if ALL pixels were transparent, this won't stop us
436
437                         m = n = 2;
438                 }
439                 else
440                 {
441                         for(x = 0; x < w; ++x)
442                                 for(y = 0; y < h; ++y)
443                                 {
444                                         ca[n]  = rgba[(x + y * iw) * 4 + 3];
445                                         if(alpha_0_is_unimportant<ColorDist>::value)
446                                                 if(!ca[n])
447                                                         continue;
448                                         c[n].r = rgba[(x + y * iw) * 4 + 2];
449                                         c[n].g = rgba[(x + y * iw) * 4 + 1];
450                                         c[n].b = rgba[(x + y * iw) * 4 + 0];
451                                         ++n;
452                                 }
453                         if(n == 0)
454                         {
455                                 n = 1;
456                                 c[0].r = 0;
457                                 c[0].g = 0;
458                                 c[0].b = 0;
459                                 ca[0] = 0;
460                         }
461                         m = n;
462
463                         if(mode == MODE_RANDOM)
464                         {
465                                 color_t mins = c[0];
466                                 color_t maxs = c[0];
467                                 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
468                                 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
469                                 for(x = 1; x < n; ++x)
470                                 {
471                                         mins.r = min(mins.r, c[x].r);
472                                         mins.g = min(mins.g, c[x].g);
473                                         mins.b = min(mins.b, c[x].b);
474                                         maxs.r = max(maxs.r, c[x].r);
475                                         maxs.g = max(maxs.g, c[x].g);
476                                         maxs.b = max(maxs.b, c[x].b);
477                                         if(dxt == DXT5)
478                                         {
479                                                 mina = min(mina, ca[x]);
480                                                 maxa = max(maxa, ca[x]);
481                                         }
482                                 }
483                                 color_t len = { maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1 };
484                                 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
485                                 for(x = 0; x < nrandom; ++x)
486                                 {
487                                         c[m].r = mins.r + rand() % len.r;
488                                         c[m].g = mins.g + rand() % len.g;
489                                         c[m].b = mins.b + rand() % len.b;
490                                         if(dxt == DXT5)
491                                                 ca[m] = mina + rand() % lena;
492                                         ++m;
493                                 }
494                         }
495                         else
496                         {
497                                 // hack for last miplevel
498                                 if(n == 1)
499                                 {
500                                         c[1] = c[0];
501                                         m = n = 2;
502                                 }
503                         }
504
505                         reduce_colors_inplace(c, n, m, ColorDist);
506                         if(dxt == DXT5)
507                                 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
508                 }
509
510                 if(refine == REFINE_NEVER)
511                 {
512                         if(dxt == DXT5)
513                         {
514                                 if(ca[1] < ca[0])
515                                 {
516                                         // select mode with 6 = 0, 7 = 255
517                                         ca[2] = ca[0];
518                                         ca[0] = ca[1];
519                                         ca[1] = ca[2];
520                                 }
521                         }
522                         if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
523                         // DXT1: select mode with 3 = transparent
524                         // other: don't select this mode
525                         {
526                                 c[2] = c[0];
527                                 c[0] = c[1];
528                                 c[1] = c[2];
529                         }
530                 }
531
532                 bool refined;
533                 do
534                 {
535                         int nc0 = 0, na0 = 0, sc0r = 0, sc0g = 0, sc0b = 0, sa0 = 0;
536                         int nc1 = 0, na1 = 0, sc1r = 0, sc1g = 0, sc1b = 0, sa1 = 0;
537                         if(refine == REFINE_LOOP)
538                                 refined = false;
539
540                         memset(out, 0, (dxt == DXT1) ? 8 : 16);
541                         for(x = 0; x < w; ++x)
542                                 for(y = 0; y < h; ++y)
543                                 {
544                                         int pindex = (x+y*4);
545                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
546                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
547                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
548                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
549                                         switch(dxt)
550                                         {
551                                                 case DXT5:
552                                                         {
553                                                                 bool visible = true;
554                                                                 int da[4];
555                                                                 int bitindex = pindex * 3;
556                                                                 da[0] = alpha_dist(ca[0], ca[2]);
557                                                                 da[1] = alpha_dist(ca[1], ca[2]);
558                                                                 da[2] = alpha_dist(0, ca[2]);
559                                                                 da[3] = alpha_dist(255, ca[2]);
560                                                                 if(da[2] <= da[0] && da[2] <= da[1] && da[2] <= da[3])
561                                                                 {
562                                                                         // 6
563                                                                         ++bitindex;
564                                                                         setbit(&out[2], bitindex);
565                                                                         ++bitindex;
566                                                                         setbit(&out[2], bitindex);
567                                                                         if(alpha_0_is_unimportant<ColorDist>::value)
568                                                                                 visible = false;
569                                                                 }
570                                                                 else if(da[3] <= da[0] && da[3] <= da[1])
571                                                                 {
572                                                                         // 7
573                                                                         setbit(&out[2], bitindex);
574                                                                         ++bitindex;
575                                                                         setbit(&out[2], bitindex);
576                                                                         ++bitindex;
577                                                                         setbit(&out[2], bitindex);
578                                                                 }
579                                                                 else if(da[0] <= da[1])
580                                                                 {
581                                                                         // 0
582                                                                         if(refine != REFINE_NEVER)
583                                                                         {
584                                                                                 ++na0;
585                                                                                 sa0 += ca[2];
586                                                                         }
587                                                                 }
588                                                                 else
589                                                                 {
590                                                                         // 1
591                                                                         setbit(&out[2], bitindex);
592                                                                         if(refine != REFINE_NEVER)
593                                                                         {
594                                                                                 ++na1;
595                                                                                 sa1 += ca[2];
596                                                                         }
597                                                                 }
598                                                                 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
599                                                                 {
600                                                                         int bitindex = pindex * 2;
601                                                                         setbit(&out[12], bitindex);
602                                                                         if(refine != REFINE_NEVER)
603                                                                         {
604                                                                                 if(!alpha_0_is_unimportant<ColorDist>::value || visible)
605                                                                                 {
606                                                                                         ++nc1;
607                                                                                         sc1r += refine_component_encode<ColorDist>(c[2].r);
608                                                                                         sc1g += refine_component_encode<ColorDist>(c[2].g);
609                                                                                         sc1b += refine_component_encode<ColorDist>(c[2].b);
610                                                                                 }
611                                                                         }
612                                                                 }
613                                                                 else
614                                                                 {
615                                                                         if(refine != REFINE_NEVER)
616                                                                         {
617                                                                                 if(!alpha_0_is_unimportant<ColorDist>::value || visible)
618                                                                                 {
619                                                                                         ++nc0;
620                                                                                         sc0r += refine_component_encode<ColorDist>(c[2].r);
621                                                                                         sc0g += refine_component_encode<ColorDist>(c[2].g);
622                                                                                         sc0b += refine_component_encode<ColorDist>(c[2].b);
623                                                                                 }
624                                                                         }
625                                                                 }
626                                                         }
627                                                         break;
628                                                 case DXT3:
629                                                         {
630                                                                 int bitindex = pindex * 4;
631                                                                 setbit(&out[0], bitindex, ca[2]);
632                                                         }
633                                                         if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
634                                                         {
635                                                                 int bitindex = pindex * 2;
636                                                                 setbit(&out[12], bitindex);
637                                                                 if(refine != REFINE_NEVER)
638                                                                 {
639                                                                         if(!alpha_0_is_unimportant<ColorDist>::value || ca[2])
640                                                                         {
641                                                                                 ++nc1;
642                                                                                 sc1r += refine_component_encode<ColorDist>(c[2].r);
643                                                                                 sc1g += refine_component_encode<ColorDist>(c[2].g);
644                                                                                 sc1b += refine_component_encode<ColorDist>(c[2].b);
645                                                                         }
646                                                                 }
647                                                         }
648                                                         else
649                                                         {
650                                                                 if(refine != REFINE_NEVER)
651                                                                 {
652                                                                         if(!alpha_0_is_unimportant<ColorDist>::value || ca[2])
653                                                                         {
654                                                                                 ++nc0;
655                                                                                 sc0r += refine_component_encode<ColorDist>(c[2].r);
656                                                                                 sc0g += refine_component_encode<ColorDist>(c[2].g);
657                                                                                 sc0b += refine_component_encode<ColorDist>(c[2].b);
658                                                                         }
659                                                                 }
660                                                         }
661                                                         break;
662                                                 case DXT1:
663                                                         {
664                                                                 // the normalmap-uses-alpha-0 hack cannot be used here
665                                                                 int bitindex = pindex * 2;
666                                                                 if(!ca[2])
667                                                                         setbit(&out[4], bitindex, 3);
668                                                                 else if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
669                                                                 {
670                                                                         setbit(&out[4], bitindex);
671                                                                         if(refine != REFINE_NEVER)
672                                                                         {
673                                                                                 ++nc1;
674                                                                                 sc1r += refine_component_encode<ColorDist>(c[2].r);
675                                                                                 sc1g += refine_component_encode<ColorDist>(c[2].g);
676                                                                                 sc1b += refine_component_encode<ColorDist>(c[2].b);
677                                                                         }
678                                                                 }
679                                                                 else
680                                                                 {
681                                                                         if(refine != REFINE_NEVER)
682                                                                         {
683                                                                                 ++nc0;
684                                                                                 sc0r += refine_component_encode<ColorDist>(c[2].r);
685                                                                                 sc0g += refine_component_encode<ColorDist>(c[2].g);
686                                                                                 sc0b += refine_component_encode<ColorDist>(c[2].b);
687                                                                         }
688                                                                 }
689                                                         }
690                                                         break;
691                                         }
692                                 }
693                         if(refine != REFINE_NEVER)
694                         {
695                                 // REFINEMENT: trick from libtxc_dxtn: reassign the colors to an average of the colors encoded with that value
696
697                                 if(dxt == DXT5)
698                                 {
699                                         if(na0)
700                                                 ca[0] = (2 * sa0 + na0) / (2 * na0);
701                                         if(na1)
702                                                 ca[1] = (2 * sa1 + na1) / (2 * na1);
703                                 }
704                                 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
705                                 {
706                                         c[2] = c[0];
707                                         c[3] = c[1];
708                                 }
709                                 if(nc0)
710                                 {
711                                         c[0].r = refine_component_decode<ColorDist>((2 * sc0r + nc0) / (2 * nc0));
712                                         c[0].g = refine_component_decode<ColorDist>((2 * sc0g + nc0) / (2 * nc0));
713                                         c[0].b = refine_component_decode<ColorDist>((2 * sc0b + nc0) / (2 * nc0));
714                                 }
715                                 if(nc1)
716                                 {
717                                         c[1].r = refine_component_decode<ColorDist>((2 * sc1r + nc1) / (2 * nc1));
718                                         c[1].g = refine_component_decode<ColorDist>((2 * sc1g + nc1) / (2 * nc1));
719                                         c[1].b = refine_component_decode<ColorDist>((2 * sc1b + nc1) / (2 * nc1));
720                                 }
721
722                                 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
723                                 {
724                                         int score_01 = 0;
725                                         int score_23 = 0;
726                                         for(x = 0; x < w; ++x)
727                                                 for(y = 0; y < h; ++y)
728                                                 {
729                                                         int pindex = (x+y*4);
730                                                         c[4].r = rgba[(x + y * iw) * 4 + 2];
731                                                         c[4].g = rgba[(x + y * iw) * 4 + 1];
732                                                         c[4].b = rgba[(x + y * iw) * 4 + 0];
733                                                         if(!alpha_0_is_unimportant<ColorDist>::value)
734                                                         {
735                                                                 if(dxt == DXT5)
736                                                                 {
737                                                                         // check ENCODED alpha
738                                                                         int bitindex_0 = pindex * 3;
739                                                                         int bitindex_1 = bitindex_0 + 2;
740                                                                         if(!testbit(&out[2], bitindex_0))
741                                                                                 if(testbit(&out[2], bitindex_1))
742                                                                                         continue;
743                                                                 }
744                                                                 else
745                                                                 {
746                                                                         // check ORIGINAL alpha (DXT1 and DXT3 preserve it)
747                                                                         ca[4] = rgba[(x + y * iw) * 4 + 3];
748                                                                         if(!ca[4])
749                                                                                 continue;
750                                                                 }
751                                                         }
752                                                         int bitindex = pindex * 2;
753                                                         if(refine == REFINE_CHECK)
754                                                         {
755                                                                 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
756                                                                 {
757                                                                         // we picked an 1
758                                                                         score_01 += ColorDist(c[1], c[4]);
759                                                                         score_23 += ColorDist(c[3], c[4]);
760                                                                 }
761                                                                 else
762                                                                 {
763                                                                         // we picked a 0
764                                                                         score_01 += ColorDist(c[0], c[4]);
765                                                                         score_23 += ColorDist(c[2], c[4]);
766                                                                 }
767                                                         }
768                                                         else if(refine == REFINE_LOOP)
769                                                         {
770                                                                 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
771                                                                 {
772                                                                         // we picked an 1
773                                                                         score_23 += ColorDist(c[3], c[4]);
774                                                                 }
775                                                                 else
776                                                                 {
777                                                                         // we picked a 0
778                                                                         score_23 += ColorDist(c[2], c[4]);
779                                                                 }
780                                                                 // we WILL run another loop iteration, if score_01 wins
781                                                                 score_01 += min(ColorDist(c[0], c[4]), ColorDist(c[1], c[4]));
782                                                         }
783                                                 }
784
785                                         if(score_23 <= score_01)
786                                         {
787                                                 // refinement was BAD
788                                                 c[0] = c[2];
789                                                 c[1] = c[3];
790                                         }
791                                         else if(refine == REFINE_LOOP)
792                                                 refined = true;
793
794                                         // alpha refinement is always good and doesn't
795                                         // need to be checked because alpha is linear
796
797                                         // when looping, though, checking the
798                                         // alpha COULD help, but we usually
799                                         // loop twice anyway as refinement
800                                         // usually helps
801                                 }
802                         }
803                 }
804                 while(refine == REFINE_LOOP && refined);
805
806                 if(refine != REFINE_NEVER)
807                 {
808                         if(dxt == DXT5)
809                         {
810                                 if(ca[1] < ca[0])
811                                 {
812                                         ca[2] = ca[0];
813                                         ca[0] = ca[1];
814                                         ca[1] = ca[2];
815                                         // swap the alphas
816                                         for(int pindex = 0; pindex < 16; ++pindex)
817                                         {
818                                                 int bitindex_set = pindex * 3;
819                                                 int bitindex_test = bitindex_set + 2;
820                                                 if(!testbit(&out[2], bitindex_test))
821                                                         xorbit(&out[2], bitindex_set);
822                                         }
823                                 }
824                         }
825                         if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
826                         // DXT1: select mode with 3 = transparent
827                         // other: don't select this mode
828                         {
829                                 c[2] = c[0];
830                                 c[0] = c[1];
831                                 c[1] = c[2];
832                                 // swap the colors
833                                 if(dxt == DXT1)
834                                 {
835                                         out[4] ^= 0x55 & ~(out[4] >> 1);
836                                         out[5] ^= 0x55 & ~(out[5] >> 1);
837                                         out[6] ^= 0x55 & ~(out[6] >> 1);
838                                         out[7] ^= 0x55 & ~(out[7] >> 1);
839                                 }
840                                 else
841                                 {
842                                         out[12] ^= 0x55 & ~(out[12] >> 1);
843                                         out[13] ^= 0x55 & ~(out[13] >> 1);
844                                         out[14] ^= 0x55 & ~(out[14] >> 1);
845                                         out[15] ^= 0x55 & ~(out[15] >> 1);
846                                 }
847                         }
848                 }
849
850                 switch(dxt)
851                 {
852                         case DXT5:
853                                 out[0] = ca[0];
854                                 out[1] = ca[1];
855                         case DXT3:
856                                 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
857                                 out[9] = (c[0].r << 3) | (c[0].g >> 3);
858                                 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
859                                 out[11] = (c[1].r << 3) | (c[1].g >> 3);
860                                 break;
861                         case DXT1:
862                                 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
863                                 out[1] = (c[0].r << 3) | (c[0].g >> 3);
864                                 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
865                                 out[3] = (c[1].r << 3) | (c[1].g >> 3);
866                                 break;
867                 }
868         }
869
870         // these color dist functions do not need the refinement check, as they always improve the situation
871         template<ColorDistFunc ColorDist> struct need_refine_check
872         {
873                 static const bool value = true;
874         };
875         template<> struct need_refine_check<color_dist_avg>
876         {
877                 static const bool value = false;
878         };
879         template<> struct need_refine_check<color_dist_wavg>
880         {
881                 static const bool value = false;
882         };
883
884         // compile time dispatch magic
885         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
886         inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
887         {
888                 switch(refine)
889                 {
890                         case REFINE_NEVER:
891                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
892                         case REFINE_LOOP:
893                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
894                         case REFINE_CHECK:
895                                 if(need_refine_check<ColorDist>::value)
896                                         return s2tc_encode_block<dxt, ColorDist, mode, REFINE_CHECK>;
897                         default:
898                         case REFINE_ALWAYS:
899                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
900                 }
901         }
902
903         // these color dist functions do not need the refinement check, as they always improve the situation
904         template<ColorDistFunc ColorDist> struct supports_fast
905         {
906                 static const bool value = true;
907         };
908         template<> struct need_refine_check<color_dist_normalmap>
909         {
910                 static const bool value = false;
911         };
912
913         template<DxtMode dxt, ColorDistFunc ColorDist>
914         inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
915         {
916                 if(nrandom > 0)
917                         return s2tc_encode_block_func<dxt, ColorDist, MODE_RANDOM>(refine);
918                 else if(!supports_fast<ColorDist>::value || nrandom == 0) // MODE_FAST not supported for normalmaps, sorry
919                         return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
920                 else
921                         return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
922         }
923
924         template<ColorDistFunc ColorDist>
925         inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
926         {
927                 switch(dxt)
928                 {
929                         case DXT1:
930                                 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
931                                 break;
932                         case DXT3:
933                                 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
934                                 break;
935                         default:
936                         case DXT5:
937                                 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
938                                 break;
939                 }
940         }
941 };
942
943 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
944 {
945         switch(cd)
946         {
947                 case RGB:
948                         return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
949                         break;
950                 case YUV:
951                         return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
952                         break;
953                 case SRGB:
954                         return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
955                         break;
956                 case SRGB_MIXED:
957                         return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
958                         break;
959                 case LAB:
960                         return s2tc_encode_block_func<color_dist_lab_srgb>(dxt, nrandom, refine);
961                         break;
962                 case AVG:
963                         return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
964                         break;
965                 default:
966                 case WAVG:
967                         return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
968                         break;
969                 case NORMALMAP:
970                         return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
971                         break;
972         }
973 }
974
975 namespace
976 {
977         inline int diffuse(int *diff, int src, int shift)
978         {
979                 int maxval = (1 << (8 - shift)) - 1;
980                 src += *diff;
981                 int ret = max(0, min(src >> shift, maxval));
982                 // simulate decoding ("loop filter")
983                 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
984                 *diff = src - loop;
985                 return ret;
986         }
987         inline int diffuse1(int *diff, int src)
988         {
989                 src += *diff;
990                 int ret = (src >= 128);
991                 int loop = ret ? 255 : 0;
992                 *diff = src - loop;
993                 return ret;
994         }
995 };
996
997 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int bgr, int alphabits)
998 {
999         int x, y;
1000         int diffuse_r = 0;
1001         int diffuse_g = 0;
1002         int diffuse_b = 0;
1003         int diffuse_a = 0;
1004         if(bgr)
1005         {
1006                 for(y = 0; y < h; ++y)
1007                         for(x = 0; x < w; ++x)
1008                         {
1009                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 2], 3);
1010                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1011                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 0], 3);
1012                         }
1013         }
1014         else
1015         {
1016                 for(y = 0; y < h; ++y)
1017                         for(x = 0; x < w; ++x)
1018                         {
1019                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1020                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1021                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1022                         }
1023         }
1024         if(srccomps == 4)
1025         {
1026                 if(alphabits == 1)
1027                 {
1028                         for(y = 0; y < h; ++y)
1029                                 for(x = 0; x < w; ++x)
1030                                         out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1031                 }
1032                 else if(alphabits == 8)
1033                 {
1034                         for(y = 0; y < h; ++y)
1035                                 for(x = 0; x < w; ++x)
1036                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1037                 }
1038                 else
1039                 {
1040                         int alphadiffuse = 8 - alphabits;
1041                         for(y = 0; y < h; ++y)
1042                                 for(x = 0; x < w; ++x)
1043                                         out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], alphadiffuse);
1044                 }
1045         }
1046         else
1047         {
1048                 int alpharange = (1 << alphabits) - 1;
1049                 for(y = 0; y < h; ++y)
1050                         for(x = 0; x < w; ++x)
1051                                 out[(x + y * w) * 4 + 3] = alpharange;
1052         }
1053 }
1054