s2tc_compressor.cpp

   1 #include <math.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4
   5 #include "s2tc_compressor.h"
   6 #include "s2tc_common.h"
   7
   8 namespace
   9 {
  10         typedef struct
  11         {
  12                 signed char r, g, b;
  13         }
  14         color_t;
  15
  16         inline bool operator<(const color_t &a, const color_t &b)
  17         {
  18                 signed char d;
  19                 d = a.r - b.r;
  20                 if(d)
  21                         return d < 0;
  22                 d = a.g - b.g;
  23                 if(d)
  24                         return d < 0;
  25                 d = a.b - b.b;
  26                 return d < 0;
  27         }
  28         // 16 differences must fit in int
  29         // i.e. a difference must be lower than 2^27
  30
  31         // shift right, rounded
  32 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
  33
  34         inline int color_dist_avg(const color_t &a, const color_t &b)
  35         {
  36                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  37                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  38                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  39                 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
  40         }
  41
  42         inline int color_dist_wavg(const color_t &a, const color_t &b)
  43         {
  44                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  45                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  46                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  47                 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
  48                 // weighted 4:16:1
  49         }
  50
  51         inline int color_dist_yuv(const color_t &a, const color_t &b)
  52         {
  53                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  54                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  55                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  56                 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
  57                 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
  58                 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
  59                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
  60                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
  61                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
  62         }
  63
  64         inline int color_dist_rgb(const color_t &a, const color_t &b)
  65         {
  66                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  67                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  68                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  69                 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
  70                 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
  71                 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
  72                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
  73                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
  74                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
  75         }
  76
  77         inline int color_dist_srgb(const color_t &a, const color_t &b)
  78         {
  79                 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
  80                 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
  81                 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
  82                 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
  83                 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
  84                 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
  85                 int sy = SHRR(y, 3) * SHRR(y, 4);
  86                 int su = SHRR(u, 3) * SHRR(u, 4);
  87                 int sv = SHRR(v, 3) * SHRR(v, 4);
  88                 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
  89                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
  90                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
  91         }
  92
  93         inline int srgb_get_y(const color_t &a)
  94         {
  95                 // convert to linear
  96                 int r = a.r * (int) a.r;
  97                 int g = a.g * (int) a.g;
  98                 int b = a.b * (int) a.b;
  99                 // find luminance
 100                 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
 101                 // square root it (!)
 102                 y = sqrt(y); // now in range 0 to 3815
 103                 return y;
 104         }
 105
 106         inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
 107         {
 108                 // get Y
 109                 int ay = srgb_get_y(a);
 110                 int by = srgb_get_y(b);
 111                 // get UV
 112                 int au = a.r * 191 - ay;
 113                 int av = a.b * 191 - ay;
 114                 int bu = b.r * 191 - by;
 115                 int bv = b.b * 191 - by;
 116                 // get differences
 117                 int y = ay - by;
 118                 int u = au - bu;
 119                 int v = av - bv;
 120                 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
 121                 // weight for u: ???
 122                 // weight for v: ???
 123         }
 124
 125         // FIXME this is likely broken
 126         inline int color_dist_lab_srgb(const color_t &a, const color_t &b)
 127         {
 128                 // undo sRGB
 129                 float ar = powf(a.r / 31.0f, 2.4f);
 130                 float ag = powf(a.g / 63.0f, 2.4f);
 131                 float ab = powf(a.b / 31.0f, 2.4f);
 132                 float br = powf(b.r / 31.0f, 2.4f);
 133                 float bg = powf(b.g / 63.0f, 2.4f);
 134                 float bb = powf(b.b / 31.0f, 2.4f);
 135                 // convert to CIE XYZ
 136                 float aX = 0.4124f * ar + 0.3576f * ag + 0.1805f * ab;
 137                 float aY = 0.2126f * ar + 0.7152f * ag + 0.0722f * ab;
 138                 float aZ = 0.0193f * ar + 0.1192f * ag + 0.9505f * ab;
 139                 float bX = 0.4124f * br + 0.3576f * bg + 0.1805f * bb;
 140                 float bY = 0.2126f * br + 0.7152f * bg + 0.0722f * bb;
 141                 float bZ = 0.0193f * br + 0.1192f * bg + 0.9505f * bb;
 142                 // convert to CIE Lab
 143                 float Xn = 0.3127f;
 144                 float Yn = 0.3290f;
 145                 float Zn = 0.3583f;
 146                 float aL = 116 * cbrtf(aY / Yn) - 16;
 147                 float aA = 500 * (cbrtf(aX / Xn) - cbrtf(aY / Yn));
 148                 float aB = 200 * (cbrtf(aY / Yn) - cbrtf(aZ / Zn));
 149                 float bL = 116 * cbrtf(bY / Yn) - 16;
 150                 float bA = 500 * (cbrtf(bX / Xn) - cbrtf(bY / Yn));
 151                 float bB = 200 * (cbrtf(bY / Yn) - cbrtf(bZ / Zn));
 152                 // euclidean distance, but moving weight away from A and B
 153                 return 1000 * ((aL - bL) * (aL - bL) + (aA - bA) * (aA - bA) + (aB - bB) * (aB - bB));
 154         }
 155
 156         inline int color_dist_normalmap(const color_t &a, const color_t &b)
 157         {
 158                 float ca[3], cb[3];
 159                 ca[0] = a.r / 31.0 * 2 - 1;
 160                 ca[1] = a.g / 63.0 * 2 - 1;
 161                 ca[2] = a.b / 31.0 * 2 - 1;
 162                 cb[0] = b.r / 31.0 * 2 - 1;
 163                 cb[1] = b.g / 63.0 * 2 - 1;
 164                 cb[2] = b.b / 31.0 * 2 - 1;
 165
 166                 return
 167                         500 *
 168                         (
 169                                 (cb[0] - ca[0]) * (cb[0] - ca[0])
 170                                 +
 171                                 (cb[1] - ca[1]) * (cb[1] - ca[1])
 172                                 +
 173                                 (cb[2] - ca[2]) * (cb[2] - ca[2])
 174                         )
 175                         ;
 176                 // max value: 500 * (4 + 4 + 4) = 6000
 177         }
 178
 179         typedef int ColorDistFunc(const color_t &a, const color_t &b);
 180
 181         inline int alpha_dist(unsigned char a, unsigned char b)
 182         {
 183                 return (a - (int) b) * (a - (int) b);
 184         }
 185
 186         template <class T, class F>
 187         // n: input count
 188         // m: total color count (including non-counted inputs)
 189         // m >= n
 190         inline void reduce_colors_inplace(T *c, int n, int m, F dist)
 191         {
 192                 int i, j, k;
 193                 int bestsum = -1;
 194                 int besti = 0;
 195                 int bestj = 1;
 196                 int dists[m][n];
 197                 // first the square
 198                 for(i = 0; i < n; ++i)
 199                 {
 200                         dists[i][i] = 0;
 201                         for(j = i+1; j < n; ++j)
 202                         {
 203                                 int d = dist(c[i], c[j]);
 204                                 dists[i][j] = dists[j][i] = d;
 205                         }
 206                 }
 207                 // then the box
 208                 for(; i < m; ++i)
 209                 {
 210                         for(j = 0; j < n; ++j)
 211                         {
 212                                 int d = dist(c[i], c[j]);
 213                                 dists[i][j] = d;
 214                         }
 215                 }
 216                 for(i = 0; i < m; ++i)
 217                         for(j = i+1; j < m; ++j)
 218                         {
 219                                 int sum = 0;
 220                                 for(k = 0; k < n; ++k)
 221                                 {
 222                                         int di = dists[i][k];
 223                                         int dj = dists[j][k];
 224                                         int m  = min(di, dj);
 225                                         sum += m;
 226                                 }
 227                                 if(bestsum < 0 || sum < bestsum)
 228                                 {
 229                                         bestsum = sum;
 230                                         besti = i;
 231                                         bestj = j;
 232                                 }
 233                         }
 234                 if(besti != 0)
 235                         c[0] = c[besti];
 236                 if(bestj != 1)
 237                         c[1] = c[bestj];
 238         }
 239         template <class T, class F>
 240         inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
 241         {
 242                 int i, j, k;
 243                 int bestsum = -1;
 244                 int besti = 0;
 245                 int bestj = 1;
 246                 int dists[m+2][n];
 247                 // first the square
 248                 for(i = 0; i < n; ++i)
 249                 {
 250                         dists[i][i] = 0;
 251                         for(j = i+1; j < n; ++j)
 252                         {
 253                                 int d = dist(c[i], c[j]);
 254                                 dists[i][j] = dists[j][i] = d;
 255                         }
 256                 }
 257                 // then the box
 258                 for(; i < m; ++i)
 259                 {
 260                         for(j = 0; j < n; ++j)
 261                         {
 262                                 int d = dist(c[i], c[j]);
 263                                 dists[i][j] = d;
 264                         }
 265                 }
 266                 // then the two extra rows
 267                 for(j = 0; j < n; ++j)
 268                 {
 269                         int d = dist(fix0, c[j]);
 270                         dists[m][j] = d;
 271                 }
 272                 for(j = 0; j < n; ++j)
 273                 {
 274                         int d = dist(fix1, c[j]);
 275                         dists[m+1][j] = d;
 276                 }
 277                 for(i = 0; i < m; ++i)
 278                         for(j = i+1; j < m; ++j)
 279                         {
 280                                 int sum = 0;
 281                                 for(k = 0; k < n; ++k)
 282                                 {
 283                                         int di = dists[i][k];
 284                                         int dj = dists[j][k];
 285                                         int d0 = dists[m][k];
 286                                         int d1 = dists[m+1][k];
 287                                         int m  = min(min(di, dj), min(d0, d1));
 288                                         sum += m;
 289                                 }
 290                                 if(bestsum < 0 || sum < bestsum)
 291                                 {
 292                                         bestsum = sum;
 293                                         besti = i;
 294                                         bestj = j;
 295                                 }
 296                         }
 297                 if(besti != 0)
 298                         c[0] = c[besti];
 299                 if(bestj != 1)
 300                         c[1] = c[bestj];
 301         }
 302
 303         enum CompressionMode
 304         {
 305                 MODE_NORMAL,
 306                 MODE_RANDOM,
 307                 MODE_FAST
 308         };
 309
 310         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, bool refine>
 311         inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
 312         {
 313                 color_t c[16 + (mode == MODE_RANDOM ? nrandom : 0)];
 314
 315                 unsigned char ca[16];
 316                 int n = 0, m = 0;
 317                 int x, y;
 318
 319                 if(mode == MODE_FAST)
 320                 {
 321                         color_t c0 = {0, 0, 0};
 322
 323                         c[0].r = rgba[2];
 324                         c[0].g = rgba[1];
 325                         c[0].b = rgba[0];
 326                         c[1] = c[0];
 327                         int dmin = ColorDist(c[0], c0);
 328                         int dmax = dmin;
 329                         if(dxt == DXT5)
 330                         {
 331                                 ca[0] = rgba[3];
 332                                 ca[1] = ca[0];
 333                         }
 334
 335                         for(x = 0; x < w; ++x)
 336                                 for(y = !x; y < h; ++y)
 337                                 {
 338                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
 339                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
 340                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
 341
 342                                         int d = ColorDist(c[2], c0);
 343                                         if(d > dmax)
 344                                         {
 345                                                 dmax = d;
 346                                                 c[1] = c[2];
 347                                         }
 348                                         if(d < dmin)
 349                                         {
 350                                                 dmin = d;
 351                                                 c[0] = c[2];
 352                                         }
 353
 354                                         if(dxt == DXT5)
 355                                         {
 356                                                 ca[2]  = rgba[(x + y * iw) * 4 + 3];
 357                                                 if(ca[2] > ca[1])
 358                                                         ca[1] = ca[2];
 359                                                 if(ca[2] < ca[0])
 360                                                         ca[0] = ca[2];
 361                                         }
 362                                 }
 363
 364                         m = n = 2;
 365                 }
 366                 else
 367                 {
 368                         for(x = 0; x < w; ++x)
 369                                 for(y = 0; y < h; ++y)
 370                                 {
 371                                         c[n].r = rgba[(x + y * iw) * 4 + 2];
 372                                         c[n].g = rgba[(x + y * iw) * 4 + 1];
 373                                         c[n].b = rgba[(x + y * iw) * 4 + 0];
 374                                         if(dxt == DXT5)
 375                                                 ca[n]  = rgba[(x + y * iw) * 4 + 3];
 376                                         ++n;
 377                                 }
 378                         m = n;
 379
 380                         if(mode == MODE_RANDOM)
 381                         {
 382                                 color_t mins = c[0];
 383                                 color_t maxs = c[0];
 384                                 for(x = 1; x < n; ++x)
 385                                 {
 386                                         mins.r = min(mins.r, c[x].r);
 387                                         mins.g = min(mins.g, c[x].g);
 388                                         mins.b = min(mins.b, c[x].b);
 389                                         maxs.r = max(maxs.r, c[x].r);
 390                                         maxs.g = max(maxs.g, c[x].g);
 391                                         maxs.b = max(maxs.b, c[x].b);
 392                                 }
 393                                 color_t len = { maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1 };
 394                                 for(x = 0; x < nrandom; ++x)
 395                                 {
 396                                         c[m].r = mins.r + rand() % len.r;
 397                                         c[m].g = mins.g + rand() % len.g;
 398                                         c[m].b = mins.b + rand() % len.b;
 399                                         ++m;
 400                                 }
 401                         }
 402                         else
 403                         {
 404                                 // hack for last miplevel
 405                                 if(n == 1)
 406                                 {
 407                                         c[1] = c[0];
 408                                         m = n = 2;
 409                                 }
 410                         }
 411
 412                         reduce_colors_inplace(c, n, m, ColorDist);
 413                         if(dxt == DXT5)
 414                                 reduce_colors_inplace_2fixpoints(ca, n, n, alpha_dist, (unsigned char) 0, (unsigned char) 255);
 415                 }
 416
 417                 if(!refine)
 418                 {
 419                         if(dxt == DXT5)
 420                         {
 421                                 if(ca[1] < ca[0])
 422                                 {
 423                                         ca[2] = ca[0];
 424                                         ca[0] = ca[1];
 425                                         ca[1] = ca[2];
 426                                 }
 427                         }
 428                         if(c[1] < c[0])
 429                         {
 430                                 c[2] = c[0];
 431                                 c[0] = c[1];
 432                                 c[1] = c[2];
 433                         }
 434                 }
 435
 436                 int nc0 = 0, na0 = 0, sc0r = 0, sc0g = 0, sc0b = 0, sa0 = 0;
 437                 int nc1 = 0, na1 = 0, sc1r = 0, sc1g = 0, sc1b = 0, sa1 = 0;
 438
 439                 memset(out, 0, (dxt == DXT1) ? 8 : 16);
 440                 for(x = 0; x < w; ++x)
 441                         for(y = 0; y < h; ++y)
 442                         {
 443                                 int pindex = (x+y*4);
 444                                 c[2].r = rgba[(x + y * iw) * 4 + 2];
 445                                 c[2].g = rgba[(x + y * iw) * 4 + 1];
 446                                 c[2].b = rgba[(x + y * iw) * 4 + 0];
 447                                 ca[2]  = rgba[(x + y * iw) * 4 + 3];
 448                                 switch(dxt)
 449                                 {
 450                                         case DXT5:
 451                                                 {
 452                                                         int da[4];
 453                                                         int bitindex = pindex * 3;
 454                                                         da[0] = alpha_dist(ca[0], ca[2]);
 455                                                         da[1] = alpha_dist(ca[1], ca[2]);
 456                                                         da[2] = alpha_dist(0, ca[2]);
 457                                                         da[3] = alpha_dist(255, ca[2]);
 458                                                         if(da[2] <= da[0] && da[2] <= da[1] && da[2] <= da[3])
 459                                                         {
 460                                                                 // 6
 461                                                                 ++bitindex;
 462                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
 463                                                                 ++bitindex;
 464                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
 465                                                         }
 466                                                         else if(da[3] <= da[0] && da[3] <= da[1])
 467                                                         {
 468                                                                 // 7
 469                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
 470                                                                 ++bitindex;
 471                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
 472                                                                 ++bitindex;
 473                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
 474                                                         }
 475                                                         else if(da[0] <= da[1])
 476                                                         {
 477                                                                 // 0
 478                                                                 if(refine)
 479                                                                 {
 480                                                                         ++na0;
 481                                                                         sa0 += ca[2];
 482                                                                 }
 483                                                         }
 484                                                         else
 485                                                         {
 486                                                                 // 1
 487                                                                 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
 488                                                                 if(refine)
 489                                                                 {
 490                                                                         ++na1;
 491                                                                         sa1 += ca[2];
 492                                                                 }
 493                                                         }
 494                                                 }
 495                                                 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 496                                                 {
 497                                                         int bitindex = pindex * 2;
 498                                                         out[bitindex / 8 + 12] |= (1 << (bitindex % 8));
 499                                                         if(refine)
 500                                                         {
 501                                                                 ++nc1;
 502                                                                 sc1r += c[2].r;
 503                                                                 sc1g += c[2].g;
 504                                                                 sc1b += c[2].b;
 505                                                         }
 506                                                 }
 507                                                 else
 508                                                 {
 509                                                         if(refine)
 510                                                         {
 511                                                                 ++nc0;
 512                                                                 sc0r += c[2].r;
 513                                                                 sc0g += c[2].g;
 514                                                                 sc0b += c[2].b;
 515                                                         }
 516                                                 }
 517                                                 break;
 518                                         case DXT3:
 519                                                 {
 520                                                         int bitindex = pindex * 4;
 521                                                         out[bitindex / 8 + 0] |= (ca[2] << (bitindex % 8));
 522                                                 }
 523                                                 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 524                                                 {
 525                                                         int bitindex = pindex * 2;
 526                                                         out[bitindex / 8 + 12] |= (1 << (bitindex % 8));
 527                                                         if(refine)
 528                                                         {
 529                                                                 ++nc1;
 530                                                                 sc1r += c[2].r;
 531                                                                 sc1g += c[2].g;
 532                                                                 sc1b += c[2].b;
 533                                                         }
 534                                                 }
 535                                                 else
 536                                                 {
 537                                                         if(refine)
 538                                                         {
 539                                                                 ++nc0;
 540                                                                 sc0r += c[2].r;
 541                                                                 sc0g += c[2].g;
 542                                                                 sc0b += c[2].b;
 543                                                         }
 544                                                 }
 545                                                 break;
 546                                         case DXT1:
 547                                                 {
 548                                                         int bitindex = pindex * 2;
 549                                                         if(!ca[2])
 550                                                                 out[bitindex / 8 + 4] |= (3 << (bitindex % 8));
 551                                                         else if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 552                                                         {
 553                                                                 out[bitindex / 8 + 4] |= (1 << (bitindex % 8));
 554                                                                 if(refine)
 555                                                                 {
 556                                                                         ++nc1;
 557                                                                         sc1r += c[2].r;
 558                                                                         sc1g += c[2].g;
 559                                                                         sc1b += c[2].b;
 560                                                                 }
 561                                                         }
 562                                                         else
 563                                                         {
 564                                                                 if(refine)
 565                                                                 {
 566                                                                         ++nc0;
 567                                                                         sc0r += c[2].r;
 568                                                                         sc0g += c[2].g;
 569                                                                         sc0b += c[2].b;
 570                                                                 }
 571                                                         }
 572                                                 }
 573                                                 break;
 574                                 }
 575                         }
 576                 if(refine)
 577                 {
 578                         if(dxt == DXT5)
 579                         {
 580                                 if(na0)
 581                                         ca[0] = (2 * sa0 + na0) / (2 * na0);
 582                                 if(na1)
 583                                         ca[1] = (2 * sa1 + na1) / (2 * na1);
 584                         }
 585                         if(nc0)
 586                         {
 587                                 c[0].r = (2 * sc0r + nc0) / (2 * nc0);
 588                                 c[0].g = (2 * sc0g + nc0) / (2 * nc0);
 589                                 c[0].b = (2 * sc0b + nc0) / (2 * nc0);
 590                         }
 591                         if(nc1)
 592                         {
 593                                 c[1].r = (2 * sc1r + nc1) / (2 * nc1);
 594                                 c[1].g = (2 * sc1g + nc1) / (2 * nc1);
 595                                 c[1].b = (2 * sc1b + nc1) / (2 * nc1);
 596                         }
 597
 598                         if(dxt == DXT5)
 599                         {
 600                                 if(ca[1] < ca[0])
 601                                 {
 602                                         ca[2] = ca[0];
 603                                         ca[0] = ca[1];
 604                                         ca[1] = ca[2];
 605                                         // swap the alphas
 606                                         for(int pindex = 0; pindex < 16; ++pindex)
 607                                         {
 608                                                 int bitindex_set = pindex * 3;
 609                                                 int bitindex_test = bitindex_set + 3;
 610                                                 if(!(out[bitindex_test / 8] & (1 << (bitindex_test % 8))))
 611                                                         out[bitindex_set / 8] ^= (1 << (bitindex_set % 8));
 612                                         }
 613                                 }
 614                         }
 615                         if(c[1] < c[0])
 616                         {
 617                                 c[2] = c[0];
 618                                 c[0] = c[1];
 619                                 c[1] = c[2];
 620                                 // swap the colors
 621                                 if(dxt == DXT1)
 622                                 {
 623                                         out[4] ^= 0x55 & ~(out[4] >> 1);
 624                                         out[5] ^= 0x55 & ~(out[5] >> 1);
 625                                         out[6] ^= 0x55 & ~(out[6] >> 1);
 626                                         out[7] ^= 0x55 & ~(out[7] >> 1);
 627                                 }
 628                                 else
 629                                 {
 630                                         out[12] ^= 0x55 & ~(out[12] >> 1);
 631                                         out[13] ^= 0x55 & ~(out[13] >> 1);
 632                                         out[14] ^= 0x55 & ~(out[14] >> 1);
 633                                         out[15] ^= 0x55 & ~(out[15] >> 1);
 634                                 }
 635                         }
 636                 }
 637                 switch(dxt)
 638                 {
 639                         case DXT5:
 640                                 out[0] = ca[0];
 641                                 out[1] = ca[1];
 642                         case DXT3:
 643                                 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
 644                                 out[9] = (c[0].r << 3) | (c[0].g >> 3);
 645                                 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
 646                                 out[11] = (c[1].r << 3) | (c[1].g >> 3);
 647                                 break;
 648                         case DXT1:
 649                                 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
 650                                 out[1] = (c[0].r << 3) | (c[0].g >> 3);
 651                                 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
 652                                 out[3] = (c[1].r << 3) | (c[1].g >> 3);
 653                                 break;
 654                 }
 655         }
 656
 657         // compile time dispatch magic
 658         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
 659         inline s2tc_encode_block_func_t s2tc_encode_block_func(bool refine)
 660         {
 661                 if(refine)
 662                         return s2tc_encode_block<dxt, ColorDist, mode, true>;
 663                 else
 664                         return s2tc_encode_block<dxt, ColorDist, mode, false>;
 665         }
 666
 667         template<DxtMode dxt, ColorDistFunc ColorDist>
 668         inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, bool refine)
 669         {
 670                 if(nrandom > 0)
 671                         return s2tc_encode_block_func<dxt, ColorDist, MODE_RANDOM>(refine);
 672                 else if(nrandom == 0)
 673                         return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
 674                 else
 675                         return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
 676         }
 677
 678         template<ColorDistFunc ColorDist>
 679         inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, bool refine)
 680         {
 681                 switch(dxt)
 682                 {
 683                         case DXT1:
 684                                 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
 685                                 break;
 686                         case DXT3:
 687                                 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
 688                                 break;
 689                         default:
 690                         case DXT5:
 691                                 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
 692                                 break;
 693                 }
 694         }
 695 };
 696
 697 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, bool refine)
 698 {
 699         switch(cd)
 700         {
 701                 case RGB:
 702                         return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
 703                         break;
 704                 case YUV:
 705                         return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
 706                         break;
 707                 case SRGB:
 708                         return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
 709                         break;
 710                 case SRGB_MIXED:
 711                         return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
 712                         break;
 713                 case LAB:
 714                         return s2tc_encode_block_func<color_dist_lab_srgb>(dxt, nrandom, refine);
 715                         break;
 716                 case AVG:
 717                         return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
 718                         break;
 719                 default:
 720                 case WAVG:
 721                         return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
 722                         break;
 723                 case NORMALMAP:
 724                         return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
 725                         break;
 726         }
 727 }
 728
 729 namespace
 730 {
 731         inline int diffuse(int *diff, int src, int shift)
 732         {
 733                 int maxval = (1 << (8 - shift)) - 1;
 734                 src += *diff;
 735                 int ret = max(0, min(src >> shift, maxval));
 736                 // simulate decoding ("loop filter")
 737                 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
 738                 *diff = src - loop;
 739                 return ret;
 740         }
 741 };
 742
 743 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int bgr, int alphabits)
 744 {
 745         int x, y;
 746         int diffuse_r = 0;
 747         int diffuse_g = 0;
 748         int diffuse_b = 0;
 749         int diffuse_a = 0;
 750         if(bgr)
 751         {
 752                 for(y = 0; y < h; ++y)
 753                         for(x = 0; x < w; ++x)
 754                         {
 755                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 2], 3);
 756                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
 757                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 0], 3);
 758                         }
 759         }
 760         else
 761         {
 762                 for(y = 0; y < h; ++y)
 763                         for(x = 0; x < w; ++x)
 764                         {
 765                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
 766                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
 767                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
 768                         }
 769         }
 770         if(srccomps == 4)
 771         {
 772                 int alphadiffuse = 8 - alphabits;
 773                 for(y = 0; y < h; ++y)
 774                         for(x = 0; x < w; ++x)
 775                                 out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], alphadiffuse);
 776         }
 777         else
 778         {
 779                 int alpharange = (1 << alphabits) - 1;
 780                 for(y = 0; y < h; ++y)
 781                         for(x = 0; x < w; ++x)
 782                                 out[(x + y * w) * 4 + 3] = alpharange;
 783         }
 784 }
 785