s2tc_algorithm.cpp

   1 /*
   2  * Copyright (C) 2011  Rudolf Polzer   All Rights Reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  */
  21 #define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
  22 #include "s2tc_license.h"
  23
  24 #include <math.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <stdio.h>
  28
  29 #include "s2tc_algorithm.h"
  30 #include "s2tc_common.h"
  31
  32 namespace
  33 {
  34         typedef struct
  35         {
  36                 signed char r, g, b;
  37         }
  38         color_t;
  39
  40         inline bool operator<(const color_t &a, const color_t &b)
  41         {
  42                 signed char d;
  43                 d = a.r - b.r;
  44                 if(d)
  45                         return d < 0;
  46                 d = a.g - b.g;
  47                 if(d)
  48                         return d < 0;
  49                 d = a.b - b.b;
  50                 return d < 0;
  51         }
  52         // 16 differences must fit in int
  53         // i.e. a difference must be lower than 2^27
  54
  55         // shift right, rounded
  56 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
  57
  58         inline int color_dist_avg(const color_t &a, const color_t &b)
  59         {
  60                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  61                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  62                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  63                 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
  64         }
  65
  66         inline int color_dist_wavg(const color_t &a, const color_t &b)
  67         {
  68                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  69                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  70                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  71                 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
  72                 // weighted 4:16:1
  73         }
  74
  75         inline int color_dist_yuv(const color_t &a, const color_t &b)
  76         {
  77                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  78                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  79                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  80                 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
  81                 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
  82                 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
  83                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
  84                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
  85                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
  86         }
  87
  88         inline int color_dist_rgb(const color_t &a, const color_t &b)
  89         {
  90                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  91                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  92                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  93                 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
  94                 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
  95                 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
  96                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
  97                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
  98                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
  99         }
 100
 101         inline int color_dist_srgb(const color_t &a, const color_t &b)
 102         {
 103                 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
 104                 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
 105                 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
 106                 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
 107                 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
 108                 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
 109                 int sy = SHRR(y, 3) * SHRR(y, 4);
 110                 int su = SHRR(u, 3) * SHRR(u, 4);
 111                 int sv = SHRR(v, 3) * SHRR(v, 4);
 112                 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
 113                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
 114                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
 115         }
 116
 117         inline int srgb_get_y(const color_t &a)
 118         {
 119                 // convert to linear
 120                 int r = a.r * (int) a.r;
 121                 int g = a.g * (int) a.g;
 122                 int b = a.b * (int) a.b;
 123                 // find luminance
 124                 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
 125                 // square root it (!)
 126                 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
 127                 return y;
 128         }
 129
 130         inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
 131         {
 132                 // get Y
 133                 int ay = srgb_get_y(a);
 134                 int by = srgb_get_y(b);
 135                 // get UV
 136                 int au = a.r * 191 - ay;
 137                 int av = a.b * 191 - ay;
 138                 int bu = b.r * 191 - by;
 139                 int bv = b.b * 191 - by;
 140                 // get differences
 141                 int y = ay - by;
 142                 int u = au - bu;
 143                 int v = av - bv;
 144                 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
 145                 // weight for u: ???
 146                 // weight for v: ???
 147         }
 148
 149         // FIXME this is likely broken
 150         inline int color_dist_lab_srgb(const color_t &a, const color_t &b)
 151         {
 152                 // undo sRGB
 153                 float ar = powf(a.r / 31.0f, 2.4f);
 154                 float ag = powf(a.g / 63.0f, 2.4f);
 155                 float ab = powf(a.b / 31.0f, 2.4f);
 156                 float br = powf(b.r / 31.0f, 2.4f);
 157                 float bg = powf(b.g / 63.0f, 2.4f);
 158                 float bb = powf(b.b / 31.0f, 2.4f);
 159                 // convert to CIE XYZ
 160                 float aX = 0.4124f * ar + 0.3576f * ag + 0.1805f * ab;
 161                 float aY = 0.2126f * ar + 0.7152f * ag + 0.0722f * ab;
 162                 float aZ = 0.0193f * ar + 0.1192f * ag + 0.9505f * ab;
 163                 float bX = 0.4124f * br + 0.3576f * bg + 0.1805f * bb;
 164                 float bY = 0.2126f * br + 0.7152f * bg + 0.0722f * bb;
 165                 float bZ = 0.0193f * br + 0.1192f * bg + 0.9505f * bb;
 166                 // convert to CIE Lab
 167                 float Xn = 0.3127f;
 168                 float Yn = 0.3290f;
 169                 float Zn = 0.3583f;
 170                 float aL = 116 * cbrtf(aY / Yn) - 16;
 171                 float aA = 500 * (cbrtf(aX / Xn) - cbrtf(aY / Yn));
 172                 float aB = 200 * (cbrtf(aY / Yn) - cbrtf(aZ / Zn));
 173                 float bL = 116 * cbrtf(bY / Yn) - 16;
 174                 float bA = 500 * (cbrtf(bX / Xn) - cbrtf(bY / Yn));
 175                 float bB = 200 * (cbrtf(bY / Yn) - cbrtf(bZ / Zn));
 176                 // euclidean distance, but moving weight away from A and B
 177                 return 1000 * ((aL - bL) * (aL - bL) + (aA - bA) * (aA - bA) + (aB - bB) * (aB - bB));
 178         }
 179
 180         inline int color_dist_normalmap(const color_t &a, const color_t &b)
 181         {
 182                 float ca[3], cb[3], n;
 183                 ca[0] = a.r / 31.0f * 2 - 1;
 184                 ca[1] = a.g / 63.0f * 2 - 1;
 185                 ca[2] = a.b / 31.0f * 2 - 1;
 186                 cb[0] = b.r / 31.0f * 2 - 1;
 187                 cb[1] = b.g / 63.0f * 2 - 1;
 188                 cb[2] = b.b / 31.0f * 2 - 1;
 189                 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
 190                 if(n > 0)
 191                 {
 192                         n = 1.0f / sqrtf(n);
 193                         ca[0] *= n;
 194                         ca[1] *= n;
 195                         ca[2] *= n;
 196                 }
 197                 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
 198                 if(n > 0)
 199                 {
 200                         n = 1.0f / sqrtf(n);
 201                         cb[0] *= n;
 202                         cb[1] *= n;
 203                         cb[2] *= n;
 204                 }
 205
 206                 return
 207                         100000 *
 208                         (
 209                                 (cb[0] - ca[0]) * (cb[0] - ca[0])
 210                                 +
 211                                 (cb[1] - ca[1]) * (cb[1] - ca[1])
 212                                 +
 213                                 (cb[2] - ca[2]) * (cb[2] - ca[2])
 214                         )
 215                         ;
 216                 // max value: 1000 * (4 + 4 + 4) = 6000
 217         }
 218
 219         typedef int ColorDistFunc(const color_t &a, const color_t &b);
 220
 221         inline int alpha_dist(unsigned char a, unsigned char b)
 222         {
 223                 return (a - (int) b) * (a - (int) b);
 224         }
 225
 226         template <class T, class F>
 227         // n: input count
 228         // m: total color count (including non-counted inputs)
 229         // m >= n
 230         inline void reduce_colors_inplace(T *c, int n, int m, F dist)
 231         {
 232                 int i, j, k;
 233                 int bestsum = -1;
 234                 int besti = 0;
 235                 int bestj = 1;
 236                 int dists[m][n];
 237                 // first the square
 238                 for(i = 0; i < n; ++i)
 239                 {
 240                         dists[i][i] = 0;
 241                         for(j = i+1; j < n; ++j)
 242                         {
 243                                 int d = dist(c[i], c[j]);
 244                                 dists[i][j] = dists[j][i] = d;
 245                         }
 246                 }
 247                 // then the box
 248                 for(; i < m; ++i)
 249                 {
 250                         for(j = 0; j < n; ++j)
 251                         {
 252                                 int d = dist(c[i], c[j]);
 253                                 dists[i][j] = d;
 254                         }
 255                 }
 256                 for(i = 0; i < m; ++i)
 257                         for(j = i+1; j < m; ++j)
 258                         {
 259                                 int sum = 0;
 260                                 for(k = 0; k < n; ++k)
 261                                 {
 262                                         int di = dists[i][k];
 263                                         int dj = dists[j][k];
 264                                         int m  = min(di, dj);
 265                                         sum += m;
 266                                 }
 267                                 if(bestsum < 0 || sum < bestsum)
 268                                 {
 269                                         bestsum = sum;
 270                                         besti = i;
 271                                         bestj = j;
 272                                 }
 273                         }
 274                 if(besti != 0)
 275                         c[0] = c[besti];
 276                 if(bestj != 1)
 277                         c[1] = c[bestj];
 278         }
 279         template <class T, class F>
 280         inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
 281         {
 282                 int i, j, k;
 283                 int bestsum = -1;
 284                 int besti = 0;
 285                 int bestj = 1;
 286                 int dists[m+2][n];
 287                 // first the square
 288                 for(i = 0; i < n; ++i)
 289                 {
 290                         dists[i][i] = 0;
 291                         for(j = i+1; j < n; ++j)
 292                         {
 293                                 int d = dist(c[i], c[j]);
 294                                 dists[i][j] = dists[j][i] = d;
 295                         }
 296                 }
 297                 // then the box
 298                 for(; i < m; ++i)
 299                 {
 300                         for(j = 0; j < n; ++j)
 301                         {
 302                                 int d = dist(c[i], c[j]);
 303                                 dists[i][j] = d;
 304                         }
 305                 }
 306                 // then the two extra rows
 307                 for(j = 0; j < n; ++j)
 308                 {
 309                         int d = dist(fix0, c[j]);
 310                         dists[m][j] = d;
 311                 }
 312                 for(j = 0; j < n; ++j)
 313                 {
 314                         int d = dist(fix1, c[j]);
 315                         dists[m+1][j] = d;
 316                 }
 317                 for(i = 0; i < m; ++i)
 318                         for(j = i+1; j < m; ++j)
 319                         {
 320                                 int sum = 0;
 321                                 for(k = 0; k < n; ++k)
 322                                 {
 323                                         int di = dists[i][k];
 324                                         int dj = dists[j][k];
 325                                         int d0 = dists[m][k];
 326                                         int d1 = dists[m+1][k];
 327                                         int m  = min(min(di, dj), min(d0, d1));
 328                                         sum += m;
 329                                 }
 330                                 if(bestsum < 0 || sum < bestsum)
 331                                 {
 332                                         bestsum = sum;
 333                                         besti = i;
 334                                         bestj = j;
 335                                 }
 336                         }
 337                 if(besti != 0)
 338                         c[0] = c[besti];
 339                 if(bestj != 1)
 340                         c[1] = c[bestj];
 341         }
 342
 343         enum CompressionMode
 344         {
 345                 MODE_NORMAL,
 346                 MODE_RANDOM,
 347                 MODE_FAST
 348         };
 349
 350         template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
 351         {
 352                 return comp;
 353         }
 354         template<> inline int refine_component_encode<color_dist_srgb>(int comp)
 355         {
 356                 return comp * comp;
 357         }
 358         template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
 359         {
 360                 return comp * comp;
 361         }
 362         template<> inline int refine_component_encode<color_dist_lab_srgb>(int comp)
 363         {
 364                 return comp * comp;
 365         }
 366
 367         template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
 368         {
 369                 return comp;
 370         }
 371         template<> inline int refine_component_decode<color_dist_srgb>(int comp)
 372         {
 373                 return sqrtf(comp) + 0.5f;
 374         }
 375         template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
 376         {
 377                 return sqrtf(comp) + 0.5f;
 378         }
 379         template<> inline int refine_component_decode<color_dist_lab_srgb>(int comp)
 380         {
 381                 return sqrtf(comp) + 0.5f;
 382         }
 383
 384         // these color dist functions ignore color values at alpha 0
 385         template<ColorDistFunc ColorDist> struct alpha_0_is_unimportant
 386         {
 387                 static bool const value = true;
 388         };
 389         template<> struct alpha_0_is_unimportant<color_dist_normalmap>
 390         {
 391                 static bool const value = false;
 392         };
 393
 394         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
 395         inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
 396         {
 397                 color_t c[16 + (mode == MODE_RANDOM ? nrandom : 0)];
 398                 unsigned char ca[16 + (mode == MODE_RANDOM ? nrandom : 0)];
 399                 int n = 0, m = 0;
 400                 int x, y;
 401
 402                 if(mode == MODE_FAST)
 403                 {
 404                         // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
 405
 406                         color_t c0 = {0, 0, 0};
 407
 408                         // dummy values because we don't know whether the first pixel willw rite
 409                         c[0].r = 31;
 410                         c[0].g = 63;
 411                         c[0].b = 31;
 412                         c[1].r = 0;
 413                         c[1].g = 0;
 414                         c[1].b = 0;
 415                         int dmin = 0x7FFFFFFF;
 416                         int dmax = 0;
 417                         if(dxt == DXT5)
 418                         {
 419                                 ca[0] = rgba[3];
 420                                 ca[1] = ca[0];
 421                         }
 422
 423                         for(x = 0; x < w; ++x)
 424                                 for(y = 0; y < h; ++y)
 425                                 {
 426                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
 427                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
 428                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
 429                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
 430                                         // MODE_FAST doesn't work for normalmaps, so this works
 431                                         if(!ca[2])
 432                                                 continue;
 433
 434                                         int d = ColorDist(c[2], c0);
 435                                         if(d > dmax)
 436                                         {
 437                                                 dmax = d;
 438                                                 c[1] = c[2];
 439                                         }
 440                                         if(d < dmin)
 441                                         {
 442                                                 dmin = d;
 443                                                 c[0] = c[2];
 444                                         }
 445
 446                                         if(dxt == DXT5)
 447                                         {
 448                                                 if(ca[2] != 255)
 449                                                 {
 450                                                         if(ca[2] > ca[1])
 451                                                                 ca[1] = ca[2];
 452                                                         if(ca[2] < ca[0])
 453                                                                 ca[0] = ca[2];
 454                                                 }
 455                                         }
 456                                 }
 457
 458                         // if ALL pixels were transparent, this won't stop us
 459
 460                         m = n = 2;
 461                 }
 462                 else
 463                 {
 464                         for(x = 0; x < w; ++x)
 465                                 for(y = 0; y < h; ++y)
 466                                 {
 467                                         ca[n]  = rgba[(x + y * iw) * 4 + 3];
 468                                         if(alpha_0_is_unimportant<ColorDist>::value)
 469                                                 if(!ca[n])
 470                                                         continue;
 471                                         c[n].r = rgba[(x + y * iw) * 4 + 2];
 472                                         c[n].g = rgba[(x + y * iw) * 4 + 1];
 473                                         c[n].b = rgba[(x + y * iw) * 4 + 0];
 474                                         ++n;
 475                                 }
 476                         if(n == 0)
 477                         {
 478                                 n = 1;
 479                                 c[0].r = 0;
 480                                 c[0].g = 0;
 481                                 c[0].b = 0;
 482                                 ca[0] = 0;
 483                         }
 484                         m = n;
 485
 486                         if(mode == MODE_RANDOM)
 487                         {
 488                                 color_t mins = c[0];
 489                                 color_t maxs = c[0];
 490                                 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
 491                                 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
 492                                 for(x = 1; x < n; ++x)
 493                                 {
 494                                         mins.r = min(mins.r, c[x].r);
 495                                         mins.g = min(mins.g, c[x].g);
 496                                         mins.b = min(mins.b, c[x].b);
 497                                         maxs.r = max(maxs.r, c[x].r);
 498                                         maxs.g = max(maxs.g, c[x].g);
 499                                         maxs.b = max(maxs.b, c[x].b);
 500                                         if(dxt == DXT5)
 501                                         {
 502                                                 mina = min(mina, ca[x]);
 503                                                 maxa = max(maxa, ca[x]);
 504                                         }
 505                                 }
 506                                 color_t len = { maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1 };
 507                                 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
 508                                 for(x = 0; x < nrandom; ++x)
 509                                 {
 510                                         c[m].r = mins.r + rand() % len.r;
 511                                         c[m].g = mins.g + rand() % len.g;
 512                                         c[m].b = mins.b + rand() % len.b;
 513                                         if(dxt == DXT5)
 514                                                 ca[m] = mina + rand() % lena;
 515                                         ++m;
 516                                 }
 517                         }
 518                         else
 519                         {
 520                                 // hack for last miplevel
 521                                 if(n == 1)
 522                                 {
 523                                         c[1] = c[0];
 524                                         m = n = 2;
 525                                 }
 526                         }
 527
 528                         reduce_colors_inplace(c, n, m, ColorDist);
 529                         if(dxt == DXT5)
 530                                 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
 531                 }
 532
 533                 if(refine == REFINE_NEVER)
 534                 {
 535                         if(dxt == DXT5)
 536                         {
 537                                 if(ca[1] < ca[0])
 538                                 {
 539                                         // select mode with 6 = 0, 7 = 255
 540                                         ca[2] = ca[0];
 541                                         ca[0] = ca[1];
 542                                         ca[1] = ca[2];
 543                                 }
 544                         }
 545                         if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
 546                         // DXT1: select mode with 3 = transparent
 547                         // other: don't select this mode
 548                         {
 549                                 c[2] = c[0];
 550                                 c[0] = c[1];
 551                                 c[1] = c[2];
 552                         }
 553                 }
 554
 555                 bool refined;
 556                 do
 557                 {
 558                         int nc0 = 0, na0 = 0, sc0r = 0, sc0g = 0, sc0b = 0, sa0 = 0;
 559                         int nc1 = 0, na1 = 0, sc1r = 0, sc1g = 0, sc1b = 0, sa1 = 0;
 560                         if(refine == REFINE_LOOP)
 561                                 refined = false;
 562
 563                         memset(out, 0, (dxt == DXT1) ? 8 : 16);
 564                         for(x = 0; x < w; ++x)
 565                                 for(y = 0; y < h; ++y)
 566                                 {
 567                                         int pindex = (x+y*4);
 568                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
 569                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
 570                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
 571                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
 572                                         switch(dxt)
 573                                         {
 574                                                 case DXT5:
 575                                                         {
 576                                                                 bool visible = true;
 577                                                                 int da[4];
 578                                                                 int bitindex = pindex * 3;
 579                                                                 da[0] = alpha_dist(ca[0], ca[2]);
 580                                                                 da[1] = alpha_dist(ca[1], ca[2]);
 581                                                                 da[2] = alpha_dist(0, ca[2]);
 582                                                                 da[3] = alpha_dist(255, ca[2]);
 583                                                                 if(da[2] <= da[0] && da[2] <= da[1] && da[2] <= da[3])
 584                                                                 {
 585                                                                         // 6
 586                                                                         ++bitindex;
 587                                                                         setbit(&out[2], bitindex);
 588                                                                         ++bitindex;
 589                                                                         setbit(&out[2], bitindex);
 590                                                                         if(alpha_0_is_unimportant<ColorDist>::value)
 591                                                                                 visible = false;
 592                                                                 }
 593                                                                 else if(da[3] <= da[0] && da[3] <= da[1])
 594                                                                 {
 595                                                                         // 7
 596                                                                         setbit(&out[2], bitindex);
 597                                                                         ++bitindex;
 598                                                                         setbit(&out[2], bitindex);
 599                                                                         ++bitindex;
 600                                                                         setbit(&out[2], bitindex);
 601                                                                 }
 602                                                                 else if(da[0] <= da[1])
 603                                                                 {
 604                                                                         // 0
 605                                                                         if(refine != REFINE_NEVER)
 606                                                                         {
 607                                                                                 ++na0;
 608                                                                                 sa0 += ca[2];
 609                                                                         }
 610                                                                 }
 611                                                                 else
 612                                                                 {
 613                                                                         // 1
 614                                                                         setbit(&out[2], bitindex);
 615                                                                         if(refine != REFINE_NEVER)
 616                                                                         {
 617                                                                                 ++na1;
 618                                                                                 sa1 += ca[2];
 619                                                                         }
 620                                                                 }
 621                                                                 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 622                                                                 {
 623                                                                         int bitindex = pindex * 2;
 624                                                                         setbit(&out[12], bitindex);
 625                                                                         if(refine != REFINE_NEVER)
 626                                                                         {
 627                                                                                 if(!alpha_0_is_unimportant<ColorDist>::value || visible)
 628                                                                                 {
 629                                                                                         ++nc1;
 630                                                                                         sc1r += refine_component_encode<ColorDist>(c[2].r);
 631                                                                                         sc1g += refine_component_encode<ColorDist>(c[2].g);
 632                                                                                         sc1b += refine_component_encode<ColorDist>(c[2].b);
 633                                                                                 }
 634                                                                         }
 635                                                                 }
 636                                                                 else
 637                                                                 {
 638                                                                         if(refine != REFINE_NEVER)
 639                                                                         {
 640                                                                                 if(!alpha_0_is_unimportant<ColorDist>::value || visible)
 641                                                                                 {
 642                                                                                         ++nc0;
 643                                                                                         sc0r += refine_component_encode<ColorDist>(c[2].r);
 644                                                                                         sc0g += refine_component_encode<ColorDist>(c[2].g);
 645                                                                                         sc0b += refine_component_encode<ColorDist>(c[2].b);
 646                                                                                 }
 647                                                                         }
 648                                                                 }
 649                                                         }
 650                                                         break;
 651                                                 case DXT3:
 652                                                         {
 653                                                                 int bitindex = pindex * 4;
 654                                                                 setbit(&out[0], bitindex, ca[2]);
 655                                                         }
 656                                                         if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 657                                                         {
 658                                                                 int bitindex = pindex * 2;
 659                                                                 setbit(&out[12], bitindex);
 660                                                                 if(refine != REFINE_NEVER)
 661                                                                 {
 662                                                                         if(!alpha_0_is_unimportant<ColorDist>::value || ca[2])
 663                                                                         {
 664                                                                                 ++nc1;
 665                                                                                 sc1r += refine_component_encode<ColorDist>(c[2].r);
 666                                                                                 sc1g += refine_component_encode<ColorDist>(c[2].g);
 667                                                                                 sc1b += refine_component_encode<ColorDist>(c[2].b);
 668                                                                         }
 669                                                                 }
 670                                                         }
 671                                                         else
 672                                                         {
 673                                                                 if(refine != REFINE_NEVER)
 674                                                                 {
 675                                                                         if(!alpha_0_is_unimportant<ColorDist>::value || ca[2])
 676                                                                         {
 677                                                                                 ++nc0;
 678                                                                                 sc0r += refine_component_encode<ColorDist>(c[2].r);
 679                                                                                 sc0g += refine_component_encode<ColorDist>(c[2].g);
 680                                                                                 sc0b += refine_component_encode<ColorDist>(c[2].b);
 681                                                                         }
 682                                                                 }
 683                                                         }
 684                                                         break;
 685                                                 case DXT1:
 686                                                         {
 687                                                                 // the normalmap-uses-alpha-0 hack cannot be used here
 688                                                                 int bitindex = pindex * 2;
 689                                                                 if(!ca[2])
 690                                                                         setbit(&out[4], bitindex, 3);
 691                                                                 else if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 692                                                                 {
 693                                                                         setbit(&out[4], bitindex);
 694                                                                         if(refine != REFINE_NEVER)
 695                                                                         {
 696                                                                                 ++nc1;
 697                                                                                 sc1r += refine_component_encode<ColorDist>(c[2].r);
 698                                                                                 sc1g += refine_component_encode<ColorDist>(c[2].g);
 699                                                                                 sc1b += refine_component_encode<ColorDist>(c[2].b);
 700                                                                         }
 701                                                                 }
 702                                                                 else
 703                                                                 {
 704                                                                         if(refine != REFINE_NEVER)
 705                                                                         {
 706                                                                                 ++nc0;
 707                                                                                 sc0r += refine_component_encode<ColorDist>(c[2].r);
 708                                                                                 sc0g += refine_component_encode<ColorDist>(c[2].g);
 709                                                                                 sc0b += refine_component_encode<ColorDist>(c[2].b);
 710                                                                         }
 711                                                                 }
 712                                                         }
 713                                                         break;
 714                                         }
 715                                 }
 716                         if(refine != REFINE_NEVER)
 717                         {
 718                                 // REFINEMENT: trick from libtxc_dxtn: reassign the colors to an average of the colors encoded with that value
 719
 720                                 if(dxt == DXT5)
 721                                 {
 722                                         if(na0)
 723                                                 ca[0] = (2 * sa0 + na0) / (2 * na0);
 724                                         if(na1)
 725                                                 ca[1] = (2 * sa1 + na1) / (2 * na1);
 726                                 }
 727                                 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
 728                                 {
 729                                         c[2] = c[0];
 730                                         c[3] = c[1];
 731                                 }
 732                                 if(nc0)
 733                                 {
 734                                         c[0].r = refine_component_decode<ColorDist>((2 * sc0r + nc0) / (2 * nc0));
 735                                         c[0].g = refine_component_decode<ColorDist>((2 * sc0g + nc0) / (2 * nc0));
 736                                         c[0].b = refine_component_decode<ColorDist>((2 * sc0b + nc0) / (2 * nc0));
 737                                 }
 738                                 if(nc1)
 739                                 {
 740                                         c[1].r = refine_component_decode<ColorDist>((2 * sc1r + nc1) / (2 * nc1));
 741                                         c[1].g = refine_component_decode<ColorDist>((2 * sc1g + nc1) / (2 * nc1));
 742                                         c[1].b = refine_component_decode<ColorDist>((2 * sc1b + nc1) / (2 * nc1));
 743                                 }
 744
 745                                 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
 746                                 {
 747                                         int score_01 = 0;
 748                                         int score_23 = 0;
 749                                         for(x = 0; x < w; ++x)
 750                                                 for(y = 0; y < h; ++y)
 751                                                 {
 752                                                         int pindex = (x+y*4);
 753                                                         c[4].r = rgba[(x + y * iw) * 4 + 2];
 754                                                         c[4].g = rgba[(x + y * iw) * 4 + 1];
 755                                                         c[4].b = rgba[(x + y * iw) * 4 + 0];
 756                                                         if(alpha_0_is_unimportant<ColorDist>::value || dxt == DXT1) // in DXT1, alpha 0 pixels are always skipped!
 757                                                         {
 758                                                                 if(dxt == DXT5)
 759                                                                 {
 760                                                                         // check ENCODED alpha
 761                                                                         int bitindex_0 = pindex * 3;
 762                                                                         int bitindex_1 = bitindex_0 + 2;
 763                                                                         if(!testbit(&out[2], bitindex_0))
 764                                                                                 if(testbit(&out[2], bitindex_1))
 765                                                                                         continue;
 766                                                                 }
 767                                                                 else
 768                                                                 {
 769                                                                         // check ORIGINAL alpha (DXT1 and DXT3 preserve it)
 770                                                                         ca[4] = rgba[(x + y * iw) * 4 + 3];
 771                                                                         if(!ca[4])
 772                                                                                 continue;
 773                                                                 }
 774                                                         }
 775                                                         int bitindex = pindex * 2;
 776                                                         if(refine == REFINE_CHECK)
 777                                                         {
 778                                                                 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
 779                                                                 {
 780                                                                         // we picked an 1
 781                                                                         score_01 += ColorDist(c[1], c[4]);
 782                                                                         score_23 += ColorDist(c[3], c[4]);
 783                                                                 }
 784                                                                 else
 785                                                                 {
 786                                                                         // we picked a 0
 787                                                                         score_01 += ColorDist(c[0], c[4]);
 788                                                                         score_23 += ColorDist(c[2], c[4]);
 789                                                                 }
 790                                                         }
 791                                                         else if(refine == REFINE_LOOP)
 792                                                         {
 793                                                                 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
 794                                                                 {
 795                                                                         // we picked an 1
 796                                                                         score_23 += ColorDist(c[3], c[4]);
 797                                                                 }
 798                                                                 else
 799                                                                 {
 800                                                                         // we picked a 0
 801                                                                         score_23 += ColorDist(c[2], c[4]);
 802                                                                 }
 803                                                                 // we WILL run another loop iteration, if score_01 wins
 804                                                                 score_01 += min(ColorDist(c[0], c[4]), ColorDist(c[1], c[4]));
 805                                                         }
 806                                                 }
 807
 808                                         if(score_23 <= score_01)
 809                                         {
 810                                                 // refinement was BAD
 811                                                 c[0] = c[2];
 812                                                 c[1] = c[3];
 813                                         }
 814                                         else if(refine == REFINE_LOOP)
 815                                                 refined = true;
 816
 817                                         // alpha refinement is always good and doesn't
 818                                         // need to be checked because alpha is linear
 819
 820                                         // when looping, though, checking the
 821                                         // alpha COULD help, but we usually
 822                                         // loop twice anyway as refinement
 823                                         // usually helps
 824                                 }
 825                         }
 826                 }
 827                 while(refine == REFINE_LOOP && refined);
 828
 829                 if(refine != REFINE_NEVER)
 830                 {
 831                         if(dxt == DXT5)
 832                         {
 833                                 if(ca[1] < ca[0])
 834                                 {
 835                                         ca[2] = ca[0];
 836                                         ca[0] = ca[1];
 837                                         ca[1] = ca[2];
 838                                         // swap the alphas
 839                                         for(int pindex = 0; pindex < 16; ++pindex)
 840                                         {
 841                                                 int bitindex_set = pindex * 3;
 842                                                 int bitindex_test = bitindex_set + 2;
 843                                                 if(!testbit(&out[2], bitindex_test))
 844                                                         xorbit(&out[2], bitindex_set);
 845                                         }
 846                                 }
 847                         }
 848                         if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
 849                         // DXT1: select mode with 3 = transparent
 850                         // other: don't select this mode
 851                         {
 852                                 c[2] = c[0];
 853                                 c[0] = c[1];
 854                                 c[1] = c[2];
 855                                 // swap the colors
 856                                 if(dxt == DXT1)
 857                                 {
 858                                         out[4] ^= 0x55 & ~(out[4] >> 1);
 859                                         out[5] ^= 0x55 & ~(out[5] >> 1);
 860                                         out[6] ^= 0x55 & ~(out[6] >> 1);
 861                                         out[7] ^= 0x55 & ~(out[7] >> 1);
 862                                 }
 863                                 else
 864                                 {
 865                                         out[12] ^= 0x55 & ~(out[12] >> 1);
 866                                         out[13] ^= 0x55 & ~(out[13] >> 1);
 867                                         out[14] ^= 0x55 & ~(out[14] >> 1);
 868                                         out[15] ^= 0x55 & ~(out[15] >> 1);
 869                                 }
 870                         }
 871                 }
 872
 873                 switch(dxt)
 874                 {
 875                         case DXT5:
 876                                 out[0] = ca[0];
 877                                 out[1] = ca[1];
 878                         case DXT3:
 879                                 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
 880                                 out[9] = (c[0].r << 3) | (c[0].g >> 3);
 881                                 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
 882                                 out[11] = (c[1].r << 3) | (c[1].g >> 3);
 883                                 break;
 884                         case DXT1:
 885                                 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
 886                                 out[1] = (c[0].r << 3) | (c[0].g >> 3);
 887                                 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
 888                                 out[3] = (c[1].r << 3) | (c[1].g >> 3);
 889                                 break;
 890                 }
 891         }
 892
 893         // these color dist functions do not need the refinement check, as they always improve the situation
 894         template<ColorDistFunc ColorDist> struct need_refine_check
 895         {
 896                 static const bool value = true;
 897         };
 898         template<> struct need_refine_check<color_dist_avg>
 899         {
 900                 static const bool value = false;
 901         };
 902         template<> struct need_refine_check<color_dist_wavg>
 903         {
 904                 static const bool value = false;
 905         };
 906
 907         // compile time dispatch magic
 908         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
 909         inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
 910         {
 911                 switch(refine)
 912                 {
 913                         case REFINE_NEVER:
 914                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
 915                         case REFINE_LOOP:
 916                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
 917                         case REFINE_CHECK:
 918                                 if(need_refine_check<ColorDist>::value)
 919                                         return s2tc_encode_block<dxt, ColorDist, mode, REFINE_CHECK>;
 920                         default:
 921                         case REFINE_ALWAYS:
 922                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
 923                 }
 924         }
 925
 926         // these color dist functions do not need the refinement check, as they always improve the situation
 927         template<ColorDistFunc ColorDist> struct supports_fast
 928         {
 929                 static const bool value = true;
 930         };
 931         template<> struct need_refine_check<color_dist_normalmap>
 932         {
 933                 static const bool value = false;
 934         };
 935
 936         template<DxtMode dxt, ColorDistFunc ColorDist>
 937         inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
 938         {
 939                 if(nrandom > 0)
 940                         return s2tc_encode_block_func<dxt, ColorDist, MODE_RANDOM>(refine);
 941                 else if(!supports_fast<ColorDist>::value || nrandom == 0) // MODE_FAST not supported for normalmaps, sorry
 942                         return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
 943                 else
 944                         return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
 945         }
 946
 947         template<ColorDistFunc ColorDist>
 948         inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
 949         {
 950                 switch(dxt)
 951                 {
 952                         case DXT1:
 953                                 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
 954                                 break;
 955                         case DXT3:
 956                                 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
 957                                 break;
 958                         default:
 959                         case DXT5:
 960                                 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
 961                                 break;
 962                 }
 963         }
 964 };
 965
 966 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
 967 {
 968         switch(cd)
 969         {
 970                 case RGB:
 971                         return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
 972                         break;
 973                 case YUV:
 974                         return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
 975                         break;
 976                 case SRGB:
 977                         return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
 978                         break;
 979                 case SRGB_MIXED:
 980                         return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
 981                         break;
 982                 case LAB:
 983                         return s2tc_encode_block_func<color_dist_lab_srgb>(dxt, nrandom, refine);
 984                         break;
 985                 case AVG:
 986                         return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
 987                         break;
 988                 default:
 989                 case WAVG:
 990                         return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
 991                         break;
 992                 case NORMALMAP:
 993                         return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
 994                         break;
 995         }
 996 }
 997
 998 namespace
 999 {
1000         inline int diffuse(int *diff, int src, int shift)
1001         {
1002                 int maxval = (1 << (8 - shift)) - 1;
1003                 src += *diff;
1004                 int ret = max(0, min(src >> shift, maxval));
1005                 // simulate decoding ("loop filter")
1006                 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
1007                 *diff = src - loop;
1008                 return ret;
1009         }
1010         inline int diffuse1(int *diff, int src)
1011         {
1012                 src += *diff;
1013                 int ret = (src >= 128);
1014                 int loop = ret ? 255 : 0;
1015                 *diff = src - loop;
1016                 return ret;
1017         }
1018 };
1019
1020 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int bgr, int alphabits)
1021 {
1022         int x, y;
1023         int diffuse_r = 0;
1024         int diffuse_g = 0;
1025         int diffuse_b = 0;
1026         int diffuse_a = 0;
1027         if(bgr)
1028         {
1029                 for(y = 0; y < h; ++y)
1030                         for(x = 0; x < w; ++x)
1031                         {
1032                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 2], 3);
1033                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1034                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 0], 3);
1035                         }
1036         }
1037         else
1038         {
1039                 for(y = 0; y < h; ++y)
1040                         for(x = 0; x < w; ++x)
1041                         {
1042                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1043                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1044                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1045                         }
1046         }
1047         if(srccomps == 4)
1048         {
1049                 if(alphabits == 1)
1050                 {
1051                         for(y = 0; y < h; ++y)
1052                                 for(x = 0; x < w; ++x)
1053                                         out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1054                 }
1055                 else if(alphabits == 8)
1056                 {
1057                         for(y = 0; y < h; ++y)
1058                                 for(x = 0; x < w; ++x)
1059                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1060                 }
1061                 else
1062                 {
1063                         int alphadiffuse = 8 - alphabits;
1064                         for(y = 0; y < h; ++y)
1065                                 for(x = 0; x < w; ++x)
1066                                         out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], alphadiffuse);
1067                 }
1068         }
1069         else
1070         {
1071                 int alpharange = (1 << alphabits) - 1;
1072                 for(y = 0; y < h; ++y)
1073                         for(x = 0; x < w; ++x)
1074                                 out[(x + y * w) * 4 + 3] = alpharange;
1075         }
1076 }
1077