OSDN Git Service

remove "int bgr" argument from 565 color reduction
[android-x86/external-s2tc.git] / s2tc_algorithm.cpp
1 /*
2  * Copyright (C) 2011  Rudolf Polzer   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21 #define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
22 #include "s2tc_license.h"
23
24 #include <math.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <stdio.h>
28
29 #include "s2tc_algorithm.h"
30 #include "s2tc_common.h"
31
32 namespace
33 {
34         typedef struct
35         {
36                 signed char r, g, b;
37         }
38         color_t;
39
40         inline bool operator<(const color_t &a, const color_t &b)
41         {
42                 signed char d;
43                 d = a.r - b.r;
44                 if(d)
45                         return d < 0;
46                 d = a.g - b.g;
47                 if(d)
48                         return d < 0;
49                 d = a.b - b.b;
50                 return d < 0;
51         }
52         // 16 differences must fit in int
53         // i.e. a difference must be lower than 2^27
54
55         // shift right, rounded
56 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
57
58         inline int color_dist_avg(const color_t &a, const color_t &b)
59         {
60                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
61                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
62                 int db = a.b - b.b; // multiplier: 31 (-1..1)
63                 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
64         }
65
66         inline int color_dist_wavg(const color_t &a, const color_t &b)
67         {
68                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
69                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
70                 int db = a.b - b.b; // multiplier: 31 (-1..1)
71                 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
72                 // weighted 4:16:1
73         }
74
75         inline int color_dist_yuv(const color_t &a, const color_t &b)
76         {
77                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
78                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
79                 int db = a.b - b.b; // multiplier: 31 (-1..1)
80                 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
81                 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
82                 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
83                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
84                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
85                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
86         }
87
88         inline int color_dist_rgb(const color_t &a, const color_t &b)
89         {
90                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
91                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
92                 int db = a.b - b.b; // multiplier: 31 (-1..1)
93                 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
94                 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
95                 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
96                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
97                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
98                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
99         }
100
101         inline int color_dist_srgb(const color_t &a, const color_t &b)
102         {
103                 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
104                 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
105                 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
106                 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
107                 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
108                 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
109                 int sy = SHRR(y, 3) * SHRR(y, 4);
110                 int su = SHRR(u, 3) * SHRR(u, 4);
111                 int sv = SHRR(v, 3) * SHRR(v, 4);
112                 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
113                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
114                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
115         }
116
117         inline int srgb_get_y(const color_t &a)
118         {
119                 // convert to linear
120                 int r = a.r * (int) a.r;
121                 int g = a.g * (int) a.g;
122                 int b = a.b * (int) a.b;
123                 // find luminance
124                 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
125                 // square root it (!)
126                 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
127                 return y;
128         }
129
130         inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
131         {
132                 // get Y
133                 int ay = srgb_get_y(a);
134                 int by = srgb_get_y(b);
135                 // get UV
136                 int au = a.r * 191 - ay;
137                 int av = a.b * 191 - ay;
138                 int bu = b.r * 191 - by;
139                 int bv = b.b * 191 - by;
140                 // get differences
141                 int y = ay - by;
142                 int u = au - bu;
143                 int v = av - bv;
144                 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
145                 // weight for u: ???
146                 // weight for v: ???
147         }
148
149         inline int color_dist_normalmap(const color_t &a, const color_t &b)
150         {
151                 float ca[3], cb[3], n;
152                 ca[0] = a.r / 31.0f * 2 - 1;
153                 ca[1] = a.g / 63.0f * 2 - 1;
154                 ca[2] = a.b / 31.0f * 2 - 1;
155                 cb[0] = b.r / 31.0f * 2 - 1;
156                 cb[1] = b.g / 63.0f * 2 - 1;
157                 cb[2] = b.b / 31.0f * 2 - 1;
158                 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
159                 if(n > 0)
160                 {
161                         n = 1.0f / sqrtf(n);
162                         ca[0] *= n;
163                         ca[1] *= n;
164                         ca[2] *= n;
165                 }
166                 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
167                 if(n > 0)
168                 {
169                         n = 1.0f / sqrtf(n);
170                         cb[0] *= n;
171                         cb[1] *= n;
172                         cb[2] *= n;
173                 }
174
175                 return
176                         100000 *
177                         (
178                                 (cb[0] - ca[0]) * (cb[0] - ca[0])
179                                 +
180                                 (cb[1] - ca[1]) * (cb[1] - ca[1])
181                                 +
182                                 (cb[2] - ca[2]) * (cb[2] - ca[2])
183                         )
184                         ;
185                 // max value: 1000 * (4 + 4 + 4) = 6000
186         }
187
188         typedef int ColorDistFunc(const color_t &a, const color_t &b);
189
190         inline int alpha_dist(unsigned char a, unsigned char b)
191         {
192                 return (a - (int) b) * (a - (int) b);
193         }
194
195         template <class T, class F>
196         // n: input count
197         // m: total color count (including non-counted inputs)
198         // m >= n
199         inline void reduce_colors_inplace(T *c, int n, int m, F dist)
200         {
201                 int i, j, k;
202                 int bestsum = -1;
203                 int besti = 0;
204                 int bestj = 1;
205                 int dists[m][n];
206                 // first the square
207                 for(i = 0; i < n; ++i)
208                 {
209                         dists[i][i] = 0;
210                         for(j = i+1; j < n; ++j)
211                         {
212                                 int d = dist(c[i], c[j]);
213                                 dists[i][j] = dists[j][i] = d;
214                         }
215                 }
216                 // then the box
217                 for(; i < m; ++i)
218                 {
219                         for(j = 0; j < n; ++j)
220                         {
221                                 int d = dist(c[i], c[j]);
222                                 dists[i][j] = d;
223                         }
224                 }
225                 for(i = 0; i < m; ++i)
226                         for(j = i+1; j < m; ++j)
227                         {
228                                 int sum = 0;
229                                 for(k = 0; k < n; ++k)
230                                 {
231                                         int di = dists[i][k];
232                                         int dj = dists[j][k];
233                                         int m  = min(di, dj);
234                                         sum += m;
235                                 }
236                                 if(bestsum < 0 || sum < bestsum)
237                                 {
238                                         bestsum = sum;
239                                         besti = i;
240                                         bestj = j;
241                                 }
242                         }
243                 if(besti != 0)
244                         c[0] = c[besti];
245                 if(bestj != 1)
246                         c[1] = c[bestj];
247         }
248         template <class T, class F>
249         inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
250         {
251                 int i, j, k;
252                 int bestsum = -1;
253                 int besti = 0;
254                 int bestj = 1;
255                 int dists[m+2][n];
256                 // first the square
257                 for(i = 0; i < n; ++i)
258                 {
259                         dists[i][i] = 0;
260                         for(j = i+1; j < n; ++j)
261                         {
262                                 int d = dist(c[i], c[j]);
263                                 dists[i][j] = dists[j][i] = d;
264                         }
265                 }
266                 // then the box
267                 for(; i < m; ++i)
268                 {
269                         for(j = 0; j < n; ++j)
270                         {
271                                 int d = dist(c[i], c[j]);
272                                 dists[i][j] = d;
273                         }
274                 }
275                 // then the two extra rows
276                 for(j = 0; j < n; ++j)
277                 {
278                         int d = dist(fix0, c[j]);
279                         dists[m][j] = d;
280                 }
281                 for(j = 0; j < n; ++j)
282                 {
283                         int d = dist(fix1, c[j]);
284                         dists[m+1][j] = d;
285                 }
286                 for(i = 0; i < m; ++i)
287                         for(j = i+1; j < m; ++j)
288                         {
289                                 int sum = 0;
290                                 for(k = 0; k < n; ++k)
291                                 {
292                                         int di = dists[i][k];
293                                         int dj = dists[j][k];
294                                         int d0 = dists[m][k];
295                                         int d1 = dists[m+1][k];
296                                         int m  = min(min(di, dj), min(d0, d1));
297                                         sum += m;
298                                 }
299                                 if(bestsum < 0 || sum < bestsum)
300                                 {
301                                         bestsum = sum;
302                                         besti = i;
303                                         bestj = j;
304                                 }
305                         }
306                 if(besti != 0)
307                         c[0] = c[besti];
308                 if(bestj != 1)
309                         c[1] = c[bestj];
310         }
311
312         enum CompressionMode
313         {
314                 MODE_NORMAL,
315                 MODE_RANDOM,
316                 MODE_FAST
317         };
318
319         template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
320         {
321                 return comp;
322         }
323         template<> inline int refine_component_encode<color_dist_srgb>(int comp)
324         {
325                 return comp * comp;
326         }
327         template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
328         {
329                 return comp * comp;
330         }
331
332         template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
333         {
334                 return comp;
335         }
336         template<> inline int refine_component_decode<color_dist_srgb>(int comp)
337         {
338                 return sqrtf(comp) + 0.5f;
339         }
340         template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
341         {
342                 return sqrtf(comp) + 0.5f;
343         }
344
345         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
346         inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
347         {
348                 color_t c[16 + (mode == MODE_RANDOM ? nrandom : 0)];
349                 unsigned char ca[16 + (mode == MODE_RANDOM ? nrandom : 0)];
350                 int n = 0, m = 0;
351                 int x, y;
352
353                 if(mode == MODE_FAST)
354                 {
355                         // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
356
357                         color_t c0 = {0, 0, 0};
358
359                         // dummy values because we don't know whether the first pixel willw rite
360                         c[0].r = 31;
361                         c[0].g = 63;
362                         c[0].b = 31;
363                         c[1].r = 0;
364                         c[1].g = 0;
365                         c[1].b = 0;
366                         int dmin = 0x7FFFFFFF;
367                         int dmax = 0;
368                         if(dxt == DXT5)
369                         {
370                                 ca[0] = rgba[3];
371                                 ca[1] = ca[0];
372                         }
373
374                         for(x = 0; x < w; ++x)
375                                 for(y = 0; y < h; ++y)
376                                 {
377                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
378                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
379                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
380                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
381                                         // MODE_FAST doesn't work for normalmaps, so this works
382                                         if(!ca[2])
383                                                 continue;
384
385                                         int d = ColorDist(c[2], c0);
386                                         if(d > dmax)
387                                         {
388                                                 dmax = d;
389                                                 c[1] = c[2];
390                                         }
391                                         if(d < dmin)
392                                         {
393                                                 dmin = d;
394                                                 c[0] = c[2];
395                                         }
396
397                                         if(dxt == DXT5)
398                                         {
399                                                 if(ca[2] != 255)
400                                                 {
401                                                         if(ca[2] > ca[1])
402                                                                 ca[1] = ca[2];
403                                                         if(ca[2] < ca[0])
404                                                                 ca[0] = ca[2];
405                                                 }
406                                         }
407                                 }
408
409                         // if ALL pixels were transparent, this won't stop us
410
411                         m = n = 2;
412                 }
413                 else
414                 {
415                         for(x = 0; x < w; ++x)
416                                 for(y = 0; y < h; ++y)
417                                 {
418                                         ca[n]  = rgba[(x + y * iw) * 4 + 3];
419                                         c[n].r = rgba[(x + y * iw) * 4 + 2];
420                                         c[n].g = rgba[(x + y * iw) * 4 + 1];
421                                         c[n].b = rgba[(x + y * iw) * 4 + 0];
422                                         ++n;
423                                 }
424                         if(n == 0)
425                         {
426                                 n = 1;
427                                 c[0].r = 0;
428                                 c[0].g = 0;
429                                 c[0].b = 0;
430                                 ca[0] = 0;
431                         }
432                         m = n;
433
434                         if(mode == MODE_RANDOM)
435                         {
436                                 color_t mins = c[0];
437                                 color_t maxs = c[0];
438                                 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
439                                 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
440                                 for(x = 1; x < n; ++x)
441                                 {
442                                         mins.r = min(mins.r, c[x].r);
443                                         mins.g = min(mins.g, c[x].g);
444                                         mins.b = min(mins.b, c[x].b);
445                                         maxs.r = max(maxs.r, c[x].r);
446                                         maxs.g = max(maxs.g, c[x].g);
447                                         maxs.b = max(maxs.b, c[x].b);
448                                         if(dxt == DXT5)
449                                         {
450                                                 mina = min(mina, ca[x]);
451                                                 maxa = max(maxa, ca[x]);
452                                         }
453                                 }
454                                 color_t len = { maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1 };
455                                 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
456                                 for(x = 0; x < nrandom; ++x)
457                                 {
458                                         c[m].r = mins.r + rand() % len.r;
459                                         c[m].g = mins.g + rand() % len.g;
460                                         c[m].b = mins.b + rand() % len.b;
461                                         if(dxt == DXT5)
462                                                 ca[m] = mina + rand() % lena;
463                                         ++m;
464                                 }
465                         }
466                         else
467                         {
468                                 // hack for last miplevel
469                                 if(n == 1)
470                                 {
471                                         c[1] = c[0];
472                                         m = n = 2;
473                                 }
474                         }
475
476                         reduce_colors_inplace(c, n, m, ColorDist);
477                         if(dxt == DXT5)
478                                 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
479                 }
480
481                 if(refine == REFINE_NEVER)
482                 {
483                         if(dxt == DXT5)
484                         {
485                                 if(ca[1] < ca[0])
486                                 {
487                                         // select mode with 6 = 0, 7 = 255
488                                         ca[2] = ca[0];
489                                         ca[0] = ca[1];
490                                         ca[1] = ca[2];
491                                 }
492                         }
493                         if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
494                         // DXT1: select mode with 3 = transparent
495                         // other: don't select this mode
496                         {
497                                 c[2] = c[0];
498                                 c[0] = c[1];
499                                 c[1] = c[2];
500                         }
501                 }
502
503                 bool refined;
504                 do
505                 {
506                         int nc0 = 0, na0 = 0, sc0r = 0, sc0g = 0, sc0b = 0, sa0 = 0;
507                         int nc1 = 0, na1 = 0, sc1r = 0, sc1g = 0, sc1b = 0, sa1 = 0;
508                         if(refine == REFINE_LOOP)
509                                 refined = false;
510
511                         memset(out, 0, (dxt == DXT1) ? 8 : 16);
512                         for(x = 0; x < w; ++x)
513                                 for(y = 0; y < h; ++y)
514                                 {
515                                         int pindex = (x+y*4);
516                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
517                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
518                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
519                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
520                                         switch(dxt)
521                                         {
522                                                 case DXT5:
523                                                         {
524                                                                 int da[4];
525                                                                 int bitindex = pindex * 3;
526                                                                 da[0] = alpha_dist(ca[0], ca[2]);
527                                                                 da[1] = alpha_dist(ca[1], ca[2]);
528                                                                 da[2] = alpha_dist(0, ca[2]);
529                                                                 da[3] = alpha_dist(255, ca[2]);
530                                                                 if(da[2] <= da[0] && da[2] <= da[1] && da[2] <= da[3])
531                                                                 {
532                                                                         // 6
533                                                                         ++bitindex;
534                                                                         setbit(&out[2], bitindex);
535                                                                         ++bitindex;
536                                                                         setbit(&out[2], bitindex);
537                                                                 }
538                                                                 else if(da[3] <= da[0] && da[3] <= da[1])
539                                                                 {
540                                                                         // 7
541                                                                         setbit(&out[2], bitindex);
542                                                                         ++bitindex;
543                                                                         setbit(&out[2], bitindex);
544                                                                         ++bitindex;
545                                                                         setbit(&out[2], bitindex);
546                                                                 }
547                                                                 else if(da[0] <= da[1])
548                                                                 {
549                                                                         // 0
550                                                                         if(refine != REFINE_NEVER)
551                                                                         {
552                                                                                 ++na0;
553                                                                                 sa0 += ca[2];
554                                                                         }
555                                                                 }
556                                                                 else
557                                                                 {
558                                                                         // 1
559                                                                         setbit(&out[2], bitindex);
560                                                                         if(refine != REFINE_NEVER)
561                                                                         {
562                                                                                 ++na1;
563                                                                                 sa1 += ca[2];
564                                                                         }
565                                                                 }
566                                                                 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
567                                                                 {
568                                                                         int bitindex = pindex * 2;
569                                                                         setbit(&out[12], bitindex);
570                                                                         if(refine != REFINE_NEVER)
571                                                                         {
572                                                                                 ++nc1;
573                                                                                 sc1r += refine_component_encode<ColorDist>(c[2].r);
574                                                                                 sc1g += refine_component_encode<ColorDist>(c[2].g);
575                                                                                 sc1b += refine_component_encode<ColorDist>(c[2].b);
576                                                                         }
577                                                                 }
578                                                                 else
579                                                                 {
580                                                                         if(refine != REFINE_NEVER)
581                                                                         {
582                                                                                 ++nc0;
583                                                                                 sc0r += refine_component_encode<ColorDist>(c[2].r);
584                                                                                 sc0g += refine_component_encode<ColorDist>(c[2].g);
585                                                                                 sc0b += refine_component_encode<ColorDist>(c[2].b);
586                                                                         }
587                                                                 }
588                                                         }
589                                                         break;
590                                                 case DXT3:
591                                                         {
592                                                                 int bitindex = pindex * 4;
593                                                                 setbit(&out[0], bitindex, ca[2]);
594                                                         }
595                                                         if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
596                                                         {
597                                                                 int bitindex = pindex * 2;
598                                                                 setbit(&out[12], bitindex);
599                                                                 if(refine != REFINE_NEVER)
600                                                                 {
601                                                                         ++nc1;
602                                                                         sc1r += refine_component_encode<ColorDist>(c[2].r);
603                                                                         sc1g += refine_component_encode<ColorDist>(c[2].g);
604                                                                         sc1b += refine_component_encode<ColorDist>(c[2].b);
605                                                                 }
606                                                         }
607                                                         else
608                                                         {
609                                                                 if(refine != REFINE_NEVER)
610                                                                 {
611                                                                         ++nc0;
612                                                                         sc0r += refine_component_encode<ColorDist>(c[2].r);
613                                                                         sc0g += refine_component_encode<ColorDist>(c[2].g);
614                                                                         sc0b += refine_component_encode<ColorDist>(c[2].b);
615                                                                 }
616                                                         }
617                                                         break;
618                                                 case DXT1:
619                                                         {
620                                                                 // the normalmap-uses-alpha-0 hack cannot be used here
621                                                                 int bitindex = pindex * 2;
622                                                                 if(!ca[2])
623                                                                         setbit(&out[4], bitindex, 3);
624                                                                 else if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
625                                                                 {
626                                                                         setbit(&out[4], bitindex);
627                                                                         if(refine != REFINE_NEVER)
628                                                                         {
629                                                                                 ++nc1;
630                                                                                 sc1r += refine_component_encode<ColorDist>(c[2].r);
631                                                                                 sc1g += refine_component_encode<ColorDist>(c[2].g);
632                                                                                 sc1b += refine_component_encode<ColorDist>(c[2].b);
633                                                                         }
634                                                                 }
635                                                                 else
636                                                                 {
637                                                                         if(refine != REFINE_NEVER)
638                                                                         {
639                                                                                 ++nc0;
640                                                                                 sc0r += refine_component_encode<ColorDist>(c[2].r);
641                                                                                 sc0g += refine_component_encode<ColorDist>(c[2].g);
642                                                                                 sc0b += refine_component_encode<ColorDist>(c[2].b);
643                                                                         }
644                                                                 }
645                                                         }
646                                                         break;
647                                         }
648                                 }
649                         if(refine != REFINE_NEVER)
650                         {
651                                 // REFINEMENT: trick from libtxc_dxtn: reassign the colors to an average of the colors encoded with that value
652
653                                 if(dxt == DXT5)
654                                 {
655                                         if(na0)
656                                                 ca[0] = (2 * sa0 + na0) / (2 * na0);
657                                         if(na1)
658                                                 ca[1] = (2 * sa1 + na1) / (2 * na1);
659                                 }
660                                 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
661                                 {
662                                         c[2] = c[0];
663                                         c[3] = c[1];
664                                 }
665                                 if(nc0)
666                                 {
667                                         c[0].r = refine_component_decode<ColorDist>((2 * sc0r + nc0) / (2 * nc0));
668                                         c[0].g = refine_component_decode<ColorDist>((2 * sc0g + nc0) / (2 * nc0));
669                                         c[0].b = refine_component_decode<ColorDist>((2 * sc0b + nc0) / (2 * nc0));
670                                 }
671                                 if(nc1)
672                                 {
673                                         c[1].r = refine_component_decode<ColorDist>((2 * sc1r + nc1) / (2 * nc1));
674                                         c[1].g = refine_component_decode<ColorDist>((2 * sc1g + nc1) / (2 * nc1));
675                                         c[1].b = refine_component_decode<ColorDist>((2 * sc1b + nc1) / (2 * nc1));
676                                 }
677
678                                 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
679                                 {
680                                         int score_01 = 0;
681                                         int score_23 = 0;
682                                         for(x = 0; x < w; ++x)
683                                                 for(y = 0; y < h; ++y)
684                                                 {
685                                                         int pindex = (x+y*4);
686                                                         c[4].r = rgba[(x + y * iw) * 4 + 2];
687                                                         c[4].g = rgba[(x + y * iw) * 4 + 1];
688                                                         c[4].b = rgba[(x + y * iw) * 4 + 0];
689                                                         if(dxt == DXT1) // in DXT1, alpha 0 pixels are always skipped!
690                                                         {
691                                                                 // check ORIGINAL alpha (DXT1 and DXT3 preserve it)
692                                                                 ca[4] = rgba[(x + y * iw) * 4 + 3];
693                                                                 if(!rgba[(x + y * iw) * 4 + 3])
694                                                                         continue;
695                                                         }
696                                                         int bitindex = pindex * 2;
697                                                         if(refine == REFINE_CHECK)
698                                                         {
699                                                                 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
700                                                                 {
701                                                                         // we picked an 1
702                                                                         score_01 += ColorDist(c[1], c[4]);
703                                                                         score_23 += ColorDist(c[3], c[4]);
704                                                                 }
705                                                                 else
706                                                                 {
707                                                                         // we picked a 0
708                                                                         score_01 += ColorDist(c[0], c[4]);
709                                                                         score_23 += ColorDist(c[2], c[4]);
710                                                                 }
711                                                         }
712                                                         else if(refine == REFINE_LOOP)
713                                                         {
714                                                                 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
715                                                                 {
716                                                                         // we picked an 1
717                                                                         score_23 += ColorDist(c[3], c[4]);
718                                                                 }
719                                                                 else
720                                                                 {
721                                                                         // we picked a 0
722                                                                         score_23 += ColorDist(c[2], c[4]);
723                                                                 }
724                                                                 // we WILL run another loop iteration, if score_01 wins
725                                                                 score_01 += min(ColorDist(c[0], c[4]), ColorDist(c[1], c[4]));
726                                                         }
727                                                 }
728
729                                         if(score_23 <= score_01)
730                                         {
731                                                 // refinement was BAD
732                                                 c[0] = c[2];
733                                                 c[1] = c[3];
734                                         }
735                                         else if(refine == REFINE_LOOP)
736                                                 refined = true;
737
738                                         // alpha refinement is always good and doesn't
739                                         // need to be checked because alpha is linear
740
741                                         // when looping, though, checking the
742                                         // alpha COULD help, but we usually
743                                         // loop twice anyway as refinement
744                                         // usually helps
745                                 }
746                         }
747                 }
748                 while(refine == REFINE_LOOP && refined);
749
750                 if(refine != REFINE_NEVER)
751                 {
752                         if(dxt == DXT5)
753                         {
754                                 if(ca[1] < ca[0])
755                                 {
756                                         ca[2] = ca[0];
757                                         ca[0] = ca[1];
758                                         ca[1] = ca[2];
759                                         // swap the alphas
760                                         for(int pindex = 0; pindex < 16; ++pindex)
761                                         {
762                                                 int bitindex_set = pindex * 3;
763                                                 int bitindex_test = bitindex_set + 2;
764                                                 if(!testbit(&out[2], bitindex_test))
765                                                         xorbit(&out[2], bitindex_set);
766                                         }
767                                 }
768                         }
769                         if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
770                         // DXT1: select mode with 3 = transparent
771                         // other: don't select this mode
772                         {
773                                 c[2] = c[0];
774                                 c[0] = c[1];
775                                 c[1] = c[2];
776                                 // swap the colors
777                                 if(dxt == DXT1)
778                                 {
779                                         out[4] ^= 0x55 & ~(out[4] >> 1);
780                                         out[5] ^= 0x55 & ~(out[5] >> 1);
781                                         out[6] ^= 0x55 & ~(out[6] >> 1);
782                                         out[7] ^= 0x55 & ~(out[7] >> 1);
783                                 }
784                                 else
785                                 {
786                                         out[12] ^= 0x55 & ~(out[12] >> 1);
787                                         out[13] ^= 0x55 & ~(out[13] >> 1);
788                                         out[14] ^= 0x55 & ~(out[14] >> 1);
789                                         out[15] ^= 0x55 & ~(out[15] >> 1);
790                                 }
791                         }
792                 }
793
794                 switch(dxt)
795                 {
796                         case DXT5:
797                                 out[0] = ca[0];
798                                 out[1] = ca[1];
799                         case DXT3:
800                                 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
801                                 out[9] = (c[0].r << 3) | (c[0].g >> 3);
802                                 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
803                                 out[11] = (c[1].r << 3) | (c[1].g >> 3);
804                                 break;
805                         case DXT1:
806                                 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
807                                 out[1] = (c[0].r << 3) | (c[0].g >> 3);
808                                 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
809                                 out[3] = (c[1].r << 3) | (c[1].g >> 3);
810                                 break;
811                 }
812         }
813
814         // these color dist functions do not need the refinement check, as they always improve the situation
815         template<ColorDistFunc ColorDist> struct need_refine_check
816         {
817                 static const bool value = true;
818         };
819         template<> struct need_refine_check<color_dist_avg>
820         {
821                 static const bool value = false;
822         };
823         template<> struct need_refine_check<color_dist_wavg>
824         {
825                 static const bool value = false;
826         };
827
828         // compile time dispatch magic
829         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
830         inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
831         {
832                 switch(refine)
833                 {
834                         case REFINE_NEVER:
835                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
836                         case REFINE_LOOP:
837                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
838                         case REFINE_CHECK:
839                                 if(need_refine_check<ColorDist>::value)
840                                         return s2tc_encode_block<dxt, ColorDist, mode, REFINE_CHECK>;
841                         default:
842                         case REFINE_ALWAYS:
843                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
844                 }
845         }
846
847         // these color dist functions do not need the refinement check, as they always improve the situation
848         template<ColorDistFunc ColorDist> struct supports_fast
849         {
850                 static const bool value = true;
851         };
852         template<> struct need_refine_check<color_dist_normalmap>
853         {
854                 static const bool value = false;
855         };
856
857         template<DxtMode dxt, ColorDistFunc ColorDist>
858         inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
859         {
860                 if(nrandom > 0)
861                         return s2tc_encode_block_func<dxt, ColorDist, MODE_RANDOM>(refine);
862                 else if(!supports_fast<ColorDist>::value || nrandom == 0) // MODE_FAST not supported for normalmaps, sorry
863                         return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
864                 else
865                         return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
866         }
867
868         template<ColorDistFunc ColorDist>
869         inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
870         {
871                 switch(dxt)
872                 {
873                         case DXT1:
874                                 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
875                                 break;
876                         case DXT3:
877                                 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
878                                 break;
879                         default:
880                         case DXT5:
881                                 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
882                                 break;
883                 }
884         }
885 };
886
887 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
888 {
889         switch(cd)
890         {
891                 case RGB:
892                         return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
893                         break;
894                 case YUV:
895                         return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
896                         break;
897                 case SRGB:
898                         return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
899                         break;
900                 case SRGB_MIXED:
901                         return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
902                         break;
903                 case AVG:
904                         return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
905                         break;
906                 default:
907                 case WAVG:
908                         return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
909                         break;
910                 case NORMALMAP:
911                         return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
912                         break;
913         }
914 }
915
916 namespace
917 {
918         inline int diffuse(int *diff, int src, int shift)
919         {
920                 const int maxval = (1 << (8 - shift)) - 1;
921                 src += *diff;
922                 int ret = max(0, min(src >> shift, maxval));
923                 // simulate decoding ("loop filter")
924                 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
925                 *diff = src - loop;
926                 return ret;
927         }
928         inline int diffuse1(int *diff, int src)
929         {
930                 src += *diff;
931                 int ret = (src >= 128);
932                 // simulate decoding ("loop filter")
933                 int loop = ret ? 255 : 0;
934                 *diff = src - loop;
935                 return ret;
936         }
937
938         inline int floyd(int *thisrow, int *downrow, int src, int shift)
939         {
940                 const int maxval = (1 << (8 - shift)) - 1;
941                 src = (src << 4) | (src >> 4);
942                 src += thisrow[1];
943                 int ret = max(0, min(src >> (shift + 4), maxval));
944                 // simulate decoding ("loop filter")
945                 int loop = (ret * 4095 / maxval);
946                 int err = src - loop;
947                 int e7 = (err * 7 + 8) / 16;
948                 err -= e7;
949                 int e3 = (err * 3 + 4) / 9;
950                 err -= e3;
951                 int e5 = (err * 5 + 3) / 6;
952                 err -= e5;
953                 int e1 = err;
954                 thisrow[2] += e7;
955                 downrow[0] += e3;
956                 downrow[1] += e5;
957                 downrow[2] += e1;
958                 return ret;
959         }
960
961         inline int floyd1(int *thisrow, int *downrow, int src)
962         {
963                 src = (src << 4) | (src >> 4);
964                 src += thisrow[1];
965                 int ret = (src >= 2048);
966                 // simulate decoding ("loop filter")
967                 int loop = ret ? 4095 : 0;
968                 int err = src - loop;
969                 int e7 = (err * 7 + 8) / 16;
970                 err -= e7;
971                 int e3 = (err * 3 + 4) / 9;
972                 err -= e3;
973                 int e5 = (err * 5 + 3) / 6;
974                 err -= e5;
975                 int e1 = err;
976                 thisrow[2] += e7;
977                 downrow[0] += e3;
978                 downrow[1] += e5;
979                 downrow[2] += e1;
980                 return ret;
981         }
982
983         template<int srccomps, int alphabits, DitherMode dither>
984         inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h)
985         {
986                 int x, y;
987                 switch(dither)
988                 {
989                         case DITHER_NONE:
990                                 {
991                                         for(y = 0; y < h; ++y)
992                                                 for(x = 0; x < w; ++x)
993                                                 {
994                                                         out[(x + y * w) * 4 + 2] = rgba[(x + y * w) * srccomps + 0] >> 3;
995                                                         out[(x + y * w) * 4 + 1] = rgba[(x + y * w) * srccomps + 1] >> 2;
996                                                         out[(x + y * w) * 4 + 0] = rgba[(x + y * w) * srccomps + 2] >> 3;
997                                                 }
998                                         if(srccomps == 4)
999                                         {
1000                                                 if(alphabits == 1)
1001                                                 {
1002                                                         for(y = 0; y < h; ++y)
1003                                                                 for(x = 0; x < w; ++x)
1004                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> 7;
1005                                                 }
1006                                                 else if(alphabits == 8)
1007                                                 {
1008                                                         for(y = 0; y < h; ++y)
1009                                                                 for(x = 0; x < w; ++x)
1010                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1011                                                 }
1012                                                 else
1013                                                 {
1014                                                         int alphadiffuse = 8 - alphabits;
1015                                                         for(y = 0; y < h; ++y)
1016                                                                 for(x = 0; x < w; ++x)
1017                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> (8 - alphabits);
1018                                                 }
1019                                         }
1020                                         else
1021                                         {
1022                                                 for(y = 0; y < h; ++y)
1023                                                         for(x = 0; x < w; ++x)
1024                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1025                                         }
1026                                 }
1027                                 break;
1028                         case DITHER_SIMPLE:
1029                                 {
1030                                         int x, y;
1031                                         int diffuse_r = 0;
1032                                         int diffuse_g = 0;
1033                                         int diffuse_b = 0;
1034                                         int diffuse_a = 0;
1035                                         for(y = 0; y < h; ++y)
1036                                                 for(x = 0; x < w; ++x)
1037                                                 {
1038                                                         out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1039                                                         out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1040                                                         out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1041                                                 }
1042                                         if(srccomps == 4)
1043                                         {
1044                                                 if(alphabits == 1)
1045                                                 {
1046                                                         for(y = 0; y < h; ++y)
1047                                                                 for(x = 0; x < w; ++x)
1048                                                                         out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1049                                                 }
1050                                                 else if(alphabits == 8)
1051                                                 {
1052                                                         for(y = 0; y < h; ++y)
1053                                                                 for(x = 0; x < w; ++x)
1054                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1055                                                 }
1056                                                 else
1057                                                 {
1058                                                         for(y = 0; y < h; ++y)
1059                                                                 for(x = 0; x < w; ++x)
1060                                                                         out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1061                                                 }
1062                                         }
1063                                         else
1064                                         {
1065                                                 for(y = 0; y < h; ++y)
1066                                                         for(x = 0; x < w; ++x)
1067                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1068                                         }
1069                                 }
1070                                 break;
1071                         case DITHER_FLOYDSTEINBERG:
1072                                 {
1073                                         int x, y;
1074                                         int pw = w+2;
1075                                         int downrow[6*pw];
1076                                         memset(downrow, 0, sizeof(downrow));
1077                                         int *thisrow_r, *thisrow_g, *thisrow_b, *thisrow_a;
1078                                         int *downrow_r, *downrow_g, *downrow_b, *downrow_a;
1079                                         for(y = 0; y < h; ++y)
1080                                         {
1081                                                 thisrow_r = downrow + ((y&1)?3:0) * pw;
1082                                                 downrow_r = downrow + ((y&1)?0:3) * pw;
1083                                                 memset(downrow_r, 0, sizeof(*downrow_r) * (3*pw));
1084                                                 thisrow_g = thisrow_r + pw;
1085                                                 thisrow_b = thisrow_g + pw;
1086                                                 downrow_g = downrow_r + pw;
1087                                                 downrow_b = downrow_g + pw;
1088                                                 for(x = 0; x < w; ++x)
1089                                                 {
1090                                                         out[(x + y * w) * 4 + 2] = floyd(&thisrow_r[x], &downrow_r[x], rgba[(x + y * w) * srccomps + 0], 3);
1091                                                         out[(x + y * w) * 4 + 1] = floyd(&thisrow_g[x], &downrow_g[x], rgba[(x + y * w) * srccomps + 1], 2);
1092                                                         out[(x + y * w) * 4 + 0] = floyd(&thisrow_b[x], &downrow_b[x], rgba[(x + y * w) * srccomps + 2], 3);
1093                                                 }
1094                                         }
1095                                         if(srccomps == 4)
1096                                         {
1097                                                 if(alphabits == 1)
1098                                                 {
1099                                                         for(y = 0; y < h; ++y)
1100                                                         {
1101                                                                 thisrow_a = downrow + (y&1) * pw;
1102                                                                 downrow_a = downrow + !(y&1) * pw;
1103                                                                 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1104                                                                 for(x = 0; x < w; ++x)
1105                                                                         out[(x + y * w) * 4 + 3] = floyd1(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3]);
1106                                                         }
1107                                                 }
1108                                                 else if(alphabits == 8)
1109                                                 {
1110                                                         for(y = 0; y < h; ++y)
1111                                                                 for(x = 0; x < w; ++x)
1112                                                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1113                                                 }
1114                                                 else
1115                                                 {
1116                                                         for(y = 0; y < h; ++y)
1117                                                         {
1118                                                                 thisrow_a = downrow + (y&1) * pw;
1119                                                                 downrow_a = downrow + !(y&1) * pw;
1120                                                                 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1121                                                                 for(x = 0; x < w; ++x)
1122                                                                         out[(x + y * w) * 4 + 3] = floyd(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1123                                                         }
1124                                                 }
1125                                         }
1126                                         else
1127                                         {
1128                                                 for(y = 0; y < h; ++y)
1129                                                         for(x = 0; x < w; ++x)
1130                                                                 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1131                                         }
1132                                 }
1133                                 break;
1134                 }
1135         }
1136
1137         template<int srccomps, int alphabits>
1138         void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, DitherMode dither)
1139         {
1140                 switch(dither)
1141                 {
1142                         case DITHER_NONE:
1143                                 rgb565_image<srccomps, alphabits, DITHER_NONE>(out, rgba, w, h);
1144                                 break;
1145                         default:
1146                         case DITHER_SIMPLE:
1147                                 rgb565_image<srccomps, alphabits, DITHER_SIMPLE>(out, rgba, w, h);
1148                                 break;
1149                         case DITHER_FLOYDSTEINBERG:
1150                                 rgb565_image<srccomps, alphabits, DITHER_FLOYDSTEINBERG>(out, rgba, w, h);
1151                                 break;
1152                 }
1153         }
1154
1155         template<int srccomps>
1156         void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int alphabits, DitherMode dither)
1157         {
1158                 switch(alphabits)
1159                 {
1160                         case 1:
1161                                 rgb565_image<srccomps, 1>(out, rgba, w, h, dither);
1162                                 break;
1163                         case 4:
1164                                 rgb565_image<srccomps, 4>(out, rgba, w, h, dither);
1165                                 break;
1166                         default:
1167                         case 8:
1168                                 rgb565_image<srccomps, 8>(out, rgba, w, h, dither);
1169                                 break;
1170                 }
1171         }
1172 };
1173
1174 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int alphabits, DitherMode dither)
1175 {
1176         switch(srccomps)
1177         {
1178                 case 3:
1179                         rgb565_image<3>(out, rgba, w, h, alphabits, dither);
1180                 case 4:
1181                 default:
1182                         rgb565_image<4>(out, rgba, w, h, alphabits, dither);
1183         }
1184 }