s2tc_compressor.cpp

   1 #include <math.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdio.h>
   5
   6 #include "s2tc_compressor.h"
   7 #include "s2tc_common.h"
   8
   9 namespace
  10 {
  11         typedef struct
  12         {
  13                 signed char r, g, b;
  14         }
  15         color_t;
  16
  17         inline bool operator<(const color_t &a, const color_t &b)
  18         {
  19                 signed char d;
  20                 d = a.r - b.r;
  21                 if(d)
  22                         return d < 0;
  23                 d = a.g - b.g;
  24                 if(d)
  25                         return d < 0;
  26                 d = a.b - b.b;
  27                 return d < 0;
  28         }
  29         // 16 differences must fit in int
  30         // i.e. a difference must be lower than 2^27
  31
  32         // shift right, rounded
  33 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
  34
  35         inline int color_dist_avg(const color_t &a, const color_t &b)
  36         {
  37                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  38                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  39                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  40                 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
  41         }
  42
  43         inline int color_dist_wavg(const color_t &a, const color_t &b)
  44         {
  45                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  46                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  47                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  48                 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
  49                 // weighted 4:16:1
  50         }
  51
  52         inline int color_dist_yuv(const color_t &a, const color_t &b)
  53         {
  54                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  55                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  56                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  57                 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
  58                 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
  59                 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
  60                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
  61                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
  62                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
  63         }
  64
  65         inline int color_dist_rgb(const color_t &a, const color_t &b)
  66         {
  67                 int dr = a.r - b.r; // multiplier: 31 (-1..1)
  68                 int dg = a.g - b.g; // multiplier: 63 (-1..1)
  69                 int db = a.b - b.b; // multiplier: 31 (-1..1)
  70                 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
  71                 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
  72                 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
  73                 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
  74                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
  75                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
  76         }
  77
  78         inline int color_dist_srgb(const color_t &a, const color_t &b)
  79         {
  80                 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
  81                 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
  82                 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
  83                 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
  84                 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
  85                 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
  86                 int sy = SHRR(y, 3) * SHRR(y, 4);
  87                 int su = SHRR(u, 3) * SHRR(u, 4);
  88                 int sv = SHRR(v, 3) * SHRR(v, 4);
  89                 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
  90                 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
  91                 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
  92         }
  93
  94         inline int srgb_get_y(const color_t &a)
  95         {
  96                 // convert to linear
  97                 int r = a.r * (int) a.r;
  98                 int g = a.g * (int) a.g;
  99                 int b = a.b * (int) a.b;
 100                 // find luminance
 101                 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
 102                 // square root it (!)
 103                 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
 104                 return y;
 105         }
 106
 107         inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
 108         {
 109                 // get Y
 110                 int ay = srgb_get_y(a);
 111                 int by = srgb_get_y(b);
 112                 // get UV
 113                 int au = a.r * 191 - ay;
 114                 int av = a.b * 191 - ay;
 115                 int bu = b.r * 191 - by;
 116                 int bv = b.b * 191 - by;
 117                 // get differences
 118                 int y = ay - by;
 119                 int u = au - bu;
 120                 int v = av - bv;
 121                 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
 122                 // weight for u: ???
 123                 // weight for v: ???
 124         }
 125
 126         // FIXME this is likely broken
 127         inline int color_dist_lab_srgb(const color_t &a, const color_t &b)
 128         {
 129                 // undo sRGB
 130                 float ar = powf(a.r / 31.0f, 2.4f);
 131                 float ag = powf(a.g / 63.0f, 2.4f);
 132                 float ab = powf(a.b / 31.0f, 2.4f);
 133                 float br = powf(b.r / 31.0f, 2.4f);
 134                 float bg = powf(b.g / 63.0f, 2.4f);
 135                 float bb = powf(b.b / 31.0f, 2.4f);
 136                 // convert to CIE XYZ
 137                 float aX = 0.4124f * ar + 0.3576f * ag + 0.1805f * ab;
 138                 float aY = 0.2126f * ar + 0.7152f * ag + 0.0722f * ab;
 139                 float aZ = 0.0193f * ar + 0.1192f * ag + 0.9505f * ab;
 140                 float bX = 0.4124f * br + 0.3576f * bg + 0.1805f * bb;
 141                 float bY = 0.2126f * br + 0.7152f * bg + 0.0722f * bb;
 142                 float bZ = 0.0193f * br + 0.1192f * bg + 0.9505f * bb;
 143                 // convert to CIE Lab
 144                 float Xn = 0.3127f;
 145                 float Yn = 0.3290f;
 146                 float Zn = 0.3583f;
 147                 float aL = 116 * cbrtf(aY / Yn) - 16;
 148                 float aA = 500 * (cbrtf(aX / Xn) - cbrtf(aY / Yn));
 149                 float aB = 200 * (cbrtf(aY / Yn) - cbrtf(aZ / Zn));
 150                 float bL = 116 * cbrtf(bY / Yn) - 16;
 151                 float bA = 500 * (cbrtf(bX / Xn) - cbrtf(bY / Yn));
 152                 float bB = 200 * (cbrtf(bY / Yn) - cbrtf(bZ / Zn));
 153                 // euclidean distance, but moving weight away from A and B
 154                 return 1000 * ((aL - bL) * (aL - bL) + (aA - bA) * (aA - bA) + (aB - bB) * (aB - bB));
 155         }
 156
 157         inline int color_dist_normalmap(const color_t &a, const color_t &b)
 158         {
 159                 float ca[3], cb[3], n;
 160                 ca[0] = a.r / 31.0f * 2 - 1;
 161                 ca[1] = a.g / 63.0f * 2 - 1;
 162                 ca[2] = a.b / 31.0f * 2 - 1;
 163                 cb[0] = b.r / 31.0f * 2 - 1;
 164                 cb[1] = b.g / 63.0f * 2 - 1;
 165                 cb[2] = b.b / 31.0f * 2 - 1;
 166                 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
 167                 if(n > 0)
 168                 {
 169                         n = 1.0f / sqrtf(n);
 170                         ca[0] *= n;
 171                         ca[1] *= n;
 172                         ca[2] *= n;
 173                 }
 174                 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
 175                 if(n > 0)
 176                 {
 177                         n = 1.0f / sqrtf(n);
 178                         cb[0] *= n;
 179                         cb[1] *= n;
 180                         cb[2] *= n;
 181                 }
 182
 183                 return
 184                         100000 *
 185                         (
 186                                 (cb[0] - ca[0]) * (cb[0] - ca[0])
 187                                 +
 188                                 (cb[1] - ca[1]) * (cb[1] - ca[1])
 189                                 +
 190                                 (cb[2] - ca[2]) * (cb[2] - ca[2])
 191                         )
 192                         ;
 193                 // max value: 1000 * (4 + 4 + 4) = 6000
 194         }
 195
 196         typedef int ColorDistFunc(const color_t &a, const color_t &b);
 197
 198         inline int alpha_dist(unsigned char a, unsigned char b)
 199         {
 200                 return (a - (int) b) * (a - (int) b);
 201         }
 202
 203         template <class T, class F>
 204         // n: input count
 205         // m: total color count (including non-counted inputs)
 206         // m >= n
 207         inline void reduce_colors_inplace(T *c, int n, int m, F dist)
 208         {
 209                 int i, j, k;
 210                 int bestsum = -1;
 211                 int besti = 0;
 212                 int bestj = 1;
 213                 int dists[m][n];
 214                 // first the square
 215                 for(i = 0; i < n; ++i)
 216                 {
 217                         dists[i][i] = 0;
 218                         for(j = i+1; j < n; ++j)
 219                         {
 220                                 int d = dist(c[i], c[j]);
 221                                 dists[i][j] = dists[j][i] = d;
 222                         }
 223                 }
 224                 // then the box
 225                 for(; i < m; ++i)
 226                 {
 227                         for(j = 0; j < n; ++j)
 228                         {
 229                                 int d = dist(c[i], c[j]);
 230                                 dists[i][j] = d;
 231                         }
 232                 }
 233                 for(i = 0; i < m; ++i)
 234                         for(j = i+1; j < m; ++j)
 235                         {
 236                                 int sum = 0;
 237                                 for(k = 0; k < n; ++k)
 238                                 {
 239                                         int di = dists[i][k];
 240                                         int dj = dists[j][k];
 241                                         int m  = min(di, dj);
 242                                         sum += m;
 243                                 }
 244                                 if(bestsum < 0 || sum < bestsum)
 245                                 {
 246                                         bestsum = sum;
 247                                         besti = i;
 248                                         bestj = j;
 249                                 }
 250                         }
 251                 if(besti != 0)
 252                         c[0] = c[besti];
 253                 if(bestj != 1)
 254                         c[1] = c[bestj];
 255         }
 256         template <class T, class F>
 257         inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
 258         {
 259                 int i, j, k;
 260                 int bestsum = -1;
 261                 int besti = 0;
 262                 int bestj = 1;
 263                 int dists[m+2][n];
 264                 // first the square
 265                 for(i = 0; i < n; ++i)
 266                 {
 267                         dists[i][i] = 0;
 268                         for(j = i+1; j < n; ++j)
 269                         {
 270                                 int d = dist(c[i], c[j]);
 271                                 dists[i][j] = dists[j][i] = d;
 272                         }
 273                 }
 274                 // then the box
 275                 for(; i < m; ++i)
 276                 {
 277                         for(j = 0; j < n; ++j)
 278                         {
 279                                 int d = dist(c[i], c[j]);
 280                                 dists[i][j] = d;
 281                         }
 282                 }
 283                 // then the two extra rows
 284                 for(j = 0; j < n; ++j)
 285                 {
 286                         int d = dist(fix0, c[j]);
 287                         dists[m][j] = d;
 288                 }
 289                 for(j = 0; j < n; ++j)
 290                 {
 291                         int d = dist(fix1, c[j]);
 292                         dists[m+1][j] = d;
 293                 }
 294                 for(i = 0; i < m; ++i)
 295                         for(j = i+1; j < m; ++j)
 296                         {
 297                                 int sum = 0;
 298                                 for(k = 0; k < n; ++k)
 299                                 {
 300                                         int di = dists[i][k];
 301                                         int dj = dists[j][k];
 302                                         int d0 = dists[m][k];
 303                                         int d1 = dists[m+1][k];
 304                                         int m  = min(min(di, dj), min(d0, d1));
 305                                         sum += m;
 306                                 }
 307                                 if(bestsum < 0 || sum < bestsum)
 308                                 {
 309                                         bestsum = sum;
 310                                         besti = i;
 311                                         bestj = j;
 312                                 }
 313                         }
 314                 if(besti != 0)
 315                         c[0] = c[besti];
 316                 if(bestj != 1)
 317                         c[1] = c[bestj];
 318         }
 319
 320         enum CompressionMode
 321         {
 322                 MODE_NORMAL,
 323                 MODE_RANDOM,
 324                 MODE_FAST
 325         };
 326
 327         template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
 328         {
 329                 return comp;
 330         }
 331         template<> inline int refine_component_encode<color_dist_srgb>(int comp)
 332         {
 333                 return comp * comp;
 334         }
 335         template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
 336         {
 337                 return comp * comp;
 338         }
 339         template<> inline int refine_component_encode<color_dist_lab_srgb>(int comp)
 340         {
 341                 return comp * comp;
 342         }
 343
 344         template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
 345         {
 346                 return comp;
 347         }
 348         template<> inline int refine_component_decode<color_dist_srgb>(int comp)
 349         {
 350                 return sqrtf(comp) + 0.5f;
 351         }
 352         template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
 353         {
 354                 return sqrtf(comp) + 0.5f;
 355         }
 356         template<> inline int refine_component_decode<color_dist_lab_srgb>(int comp)
 357         {
 358                 return sqrtf(comp) + 0.5f;
 359         }
 360
 361         // these color dist functions ignore color values at alpha 0
 362         template<ColorDistFunc ColorDist> struct alpha_0_is_unimportant
 363         {
 364                 static bool const value = true;
 365         };
 366         template<> struct alpha_0_is_unimportant<color_dist_normalmap>
 367         {
 368                 static bool const value = false;
 369         };
 370
 371         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
 372         inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
 373         {
 374                 color_t c[16 + (mode == MODE_RANDOM ? nrandom : 0)];
 375                 unsigned char ca[16 + (mode == MODE_RANDOM ? nrandom : 0)];
 376                 int n = 0, m = 0;
 377                 int x, y;
 378
 379                 if(mode == MODE_FAST)
 380                 {
 381                         // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
 382
 383                         color_t c0 = {0, 0, 0};
 384
 385                         // dummy values because we don't know whether the first pixel willw rite
 386                         c[0].r = 31;
 387                         c[0].g = 63;
 388                         c[0].b = 31;
 389                         c[1].r = 0;
 390                         c[1].g = 0;
 391                         c[1].b = 0;
 392                         int dmin = 0x7FFFFFFF;
 393                         int dmax = 0;
 394                         if(dxt == DXT5)
 395                         {
 396                                 ca[0] = rgba[3];
 397                                 ca[1] = ca[0];
 398                         }
 399
 400                         for(x = 0; x < w; ++x)
 401                                 for(y = 0; y < h; ++y)
 402                                 {
 403                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
 404                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
 405                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
 406                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
 407                                         // MODE_FAST doesn't work for normalmaps, so this works
 408                                         if(!ca[2])
 409                                                 continue;
 410
 411                                         int d = ColorDist(c[2], c0);
 412                                         if(d > dmax)
 413                                         {
 414                                                 dmax = d;
 415                                                 c[1] = c[2];
 416                                         }
 417                                         if(d < dmin)
 418                                         {
 419                                                 dmin = d;
 420                                                 c[0] = c[2];
 421                                         }
 422
 423                                         if(dxt == DXT5)
 424                                         {
 425                                                 if(ca[2] != 255)
 426                                                 {
 427                                                         if(ca[2] > ca[1])
 428                                                                 ca[1] = ca[2];
 429                                                         if(ca[2] < ca[0])
 430                                                                 ca[0] = ca[2];
 431                                                 }
 432                                         }
 433                                 }
 434
 435                         // if ALL pixels were transparent, this won't stop us
 436
 437                         m = n = 2;
 438                 }
 439                 else
 440                 {
 441                         for(x = 0; x < w; ++x)
 442                                 for(y = 0; y < h; ++y)
 443                                 {
 444                                         ca[n]  = rgba[(x + y * iw) * 4 + 3];
 445                                         if(alpha_0_is_unimportant<ColorDist>::value)
 446                                                 if(!ca[n])
 447                                                         continue;
 448                                         c[n].r = rgba[(x + y * iw) * 4 + 2];
 449                                         c[n].g = rgba[(x + y * iw) * 4 + 1];
 450                                         c[n].b = rgba[(x + y * iw) * 4 + 0];
 451                                         ++n;
 452                                 }
 453                         if(n == 0)
 454                         {
 455                                 n = 1;
 456                                 c[0].r = 0;
 457                                 c[0].g = 0;
 458                                 c[0].b = 0;
 459                                 ca[0] = 0;
 460                         }
 461                         m = n;
 462
 463                         if(mode == MODE_RANDOM)
 464                         {
 465                                 color_t mins = c[0];
 466                                 color_t maxs = c[0];
 467                                 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
 468                                 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
 469                                 for(x = 1; x < n; ++x)
 470                                 {
 471                                         mins.r = min(mins.r, c[x].r);
 472                                         mins.g = min(mins.g, c[x].g);
 473                                         mins.b = min(mins.b, c[x].b);
 474                                         maxs.r = max(maxs.r, c[x].r);
 475                                         maxs.g = max(maxs.g, c[x].g);
 476                                         maxs.b = max(maxs.b, c[x].b);
 477                                         if(dxt == DXT5)
 478                                         {
 479                                                 mina = min(mina, ca[x]);
 480                                                 maxa = max(maxa, ca[x]);
 481                                         }
 482                                 }
 483                                 color_t len = { maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1 };
 484                                 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
 485                                 for(x = 0; x < nrandom; ++x)
 486                                 {
 487                                         c[m].r = mins.r + rand() % len.r;
 488                                         c[m].g = mins.g + rand() % len.g;
 489                                         c[m].b = mins.b + rand() % len.b;
 490                                         if(dxt == DXT5)
 491                                                 ca[m] = mina + rand() % lena;
 492                                         ++m;
 493                                 }
 494                         }
 495                         else
 496                         {
 497                                 // hack for last miplevel
 498                                 if(n == 1)
 499                                 {
 500                                         c[1] = c[0];
 501                                         m = n = 2;
 502                                 }
 503                         }
 504
 505                         reduce_colors_inplace(c, n, m, ColorDist);
 506                         if(dxt == DXT5)
 507                                 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
 508                 }
 509
 510                 if(refine == REFINE_NEVER)
 511                 {
 512                         if(dxt == DXT5)
 513                         {
 514                                 if(ca[1] < ca[0])
 515                                 {
 516                                         // select mode with 6 = 0, 7 = 255
 517                                         ca[2] = ca[0];
 518                                         ca[0] = ca[1];
 519                                         ca[1] = ca[2];
 520                                 }
 521                         }
 522                         if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
 523                         // DXT1: select mode with 3 = transparent
 524                         // other: don't select this mode
 525                         {
 526                                 c[2] = c[0];
 527                                 c[0] = c[1];
 528                                 c[1] = c[2];
 529                         }
 530                 }
 531
 532                 bool refined;
 533                 do
 534                 {
 535                         int nc0 = 0, na0 = 0, sc0r = 0, sc0g = 0, sc0b = 0, sa0 = 0;
 536                         int nc1 = 0, na1 = 0, sc1r = 0, sc1g = 0, sc1b = 0, sa1 = 0;
 537                         if(refine == REFINE_LOOP)
 538                                 refined = false;
 539
 540                         memset(out, 0, (dxt == DXT1) ? 8 : 16);
 541                         for(x = 0; x < w; ++x)
 542                                 for(y = 0; y < h; ++y)
 543                                 {
 544                                         int pindex = (x+y*4);
 545                                         c[2].r = rgba[(x + y * iw) * 4 + 2];
 546                                         c[2].g = rgba[(x + y * iw) * 4 + 1];
 547                                         c[2].b = rgba[(x + y * iw) * 4 + 0];
 548                                         ca[2]  = rgba[(x + y * iw) * 4 + 3];
 549                                         switch(dxt)
 550                                         {
 551                                                 case DXT5:
 552                                                         {
 553                                                                 bool visible = true;
 554                                                                 int da[4];
 555                                                                 int bitindex = pindex * 3;
 556                                                                 da[0] = alpha_dist(ca[0], ca[2]);
 557                                                                 da[1] = alpha_dist(ca[1], ca[2]);
 558                                                                 da[2] = alpha_dist(0, ca[2]);
 559                                                                 da[3] = alpha_dist(255, ca[2]);
 560                                                                 if(da[2] <= da[0] && da[2] <= da[1] && da[2] <= da[3])
 561                                                                 {
 562                                                                         // 6
 563                                                                         ++bitindex;
 564                                                                         setbit(&out[2], bitindex);
 565                                                                         ++bitindex;
 566                                                                         setbit(&out[2], bitindex);
 567                                                                         if(alpha_0_is_unimportant<ColorDist>::value)
 568                                                                                 visible = false;
 569                                                                 }
 570                                                                 else if(da[3] <= da[0] && da[3] <= da[1])
 571                                                                 {
 572                                                                         // 7
 573                                                                         setbit(&out[2], bitindex);
 574                                                                         ++bitindex;
 575                                                                         setbit(&out[2], bitindex);
 576                                                                         ++bitindex;
 577                                                                         setbit(&out[2], bitindex);
 578                                                                 }
 579                                                                 else if(da[0] <= da[1])
 580                                                                 {
 581                                                                         // 0
 582                                                                         if(refine != REFINE_NEVER)
 583                                                                         {
 584                                                                                 ++na0;
 585                                                                                 sa0 += ca[2];
 586                                                                         }
 587                                                                 }
 588                                                                 else
 589                                                                 {
 590                                                                         // 1
 591                                                                         setbit(&out[2], bitindex);
 592                                                                         if(refine != REFINE_NEVER)
 593                                                                         {
 594                                                                                 ++na1;
 595                                                                                 sa1 += ca[2];
 596                                                                         }
 597                                                                 }
 598                                                                 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 599                                                                 {
 600                                                                         int bitindex = pindex * 2;
 601                                                                         setbit(&out[12], bitindex);
 602                                                                         if(refine != REFINE_NEVER)
 603                                                                         {
 604                                                                                 if(!alpha_0_is_unimportant<ColorDist>::value || visible)
 605                                                                                 {
 606                                                                                         ++nc1;
 607                                                                                         sc1r += refine_component_encode<ColorDist>(c[2].r);
 608                                                                                         sc1g += refine_component_encode<ColorDist>(c[2].g);
 609                                                                                         sc1b += refine_component_encode<ColorDist>(c[2].b);
 610                                                                                 }
 611                                                                         }
 612                                                                 }
 613                                                                 else
 614                                                                 {
 615                                                                         if(refine != REFINE_NEVER)
 616                                                                         {
 617                                                                                 if(!alpha_0_is_unimportant<ColorDist>::value || visible)
 618                                                                                 {
 619                                                                                         ++nc0;
 620                                                                                         sc0r += refine_component_encode<ColorDist>(c[2].r);
 621                                                                                         sc0g += refine_component_encode<ColorDist>(c[2].g);
 622                                                                                         sc0b += refine_component_encode<ColorDist>(c[2].b);
 623                                                                                 }
 624                                                                         }
 625                                                                 }
 626                                                         }
 627                                                         break;
 628                                                 case DXT3:
 629                                                         {
 630                                                                 int bitindex = pindex * 4;
 631                                                                 setbit(&out[0], bitindex, ca[2]);
 632                                                         }
 633                                                         if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 634                                                         {
 635                                                                 int bitindex = pindex * 2;
 636                                                                 setbit(&out[12], bitindex);
 637                                                                 if(refine != REFINE_NEVER)
 638                                                                 {
 639                                                                         if(!alpha_0_is_unimportant<ColorDist>::value || ca[2])
 640                                                                         {
 641                                                                                 ++nc1;
 642                                                                                 sc1r += refine_component_encode<ColorDist>(c[2].r);
 643                                                                                 sc1g += refine_component_encode<ColorDist>(c[2].g);
 644                                                                                 sc1b += refine_component_encode<ColorDist>(c[2].b);
 645                                                                         }
 646                                                                 }
 647                                                         }
 648                                                         else
 649                                                         {
 650                                                                 if(refine != REFINE_NEVER)
 651                                                                 {
 652                                                                         if(!alpha_0_is_unimportant<ColorDist>::value || ca[2])
 653                                                                         {
 654                                                                                 ++nc0;
 655                                                                                 sc0r += refine_component_encode<ColorDist>(c[2].r);
 656                                                                                 sc0g += refine_component_encode<ColorDist>(c[2].g);
 657                                                                                 sc0b += refine_component_encode<ColorDist>(c[2].b);
 658                                                                         }
 659                                                                 }
 660                                                         }
 661                                                         break;
 662                                                 case DXT1:
 663                                                         {
 664                                                                 // the normalmap-uses-alpha-0 hack cannot be used here
 665                                                                 int bitindex = pindex * 2;
 666                                                                 if(!ca[2])
 667                                                                         setbit(&out[4], bitindex, 3);
 668                                                                 else if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
 669                                                                 {
 670                                                                         setbit(&out[4], bitindex);
 671                                                                         if(refine != REFINE_NEVER)
 672                                                                         {
 673                                                                                 ++nc1;
 674                                                                                 sc1r += refine_component_encode<ColorDist>(c[2].r);
 675                                                                                 sc1g += refine_component_encode<ColorDist>(c[2].g);
 676                                                                                 sc1b += refine_component_encode<ColorDist>(c[2].b);
 677                                                                         }
 678                                                                 }
 679                                                                 else
 680                                                                 {
 681                                                                         if(refine != REFINE_NEVER)
 682                                                                         {
 683                                                                                 ++nc0;
 684                                                                                 sc0r += refine_component_encode<ColorDist>(c[2].r);
 685                                                                                 sc0g += refine_component_encode<ColorDist>(c[2].g);
 686                                                                                 sc0b += refine_component_encode<ColorDist>(c[2].b);
 687                                                                         }
 688                                                                 }
 689                                                         }
 690                                                         break;
 691                                         }
 692                                 }
 693                         if(refine != REFINE_NEVER)
 694                         {
 695                                 // REFINEMENT: trick from libtxc_dxtn: reassign the colors to an average of the colors encoded with that value
 696
 697                                 if(dxt == DXT5)
 698                                 {
 699                                         if(na0)
 700                                                 ca[0] = (2 * sa0 + na0) / (2 * na0);
 701                                         if(na1)
 702                                                 ca[1] = (2 * sa1 + na1) / (2 * na1);
 703                                 }
 704                                 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
 705                                 {
 706                                         c[2] = c[0];
 707                                         c[3] = c[1];
 708                                 }
 709                                 if(nc0)
 710                                 {
 711                                         c[0].r = refine_component_decode<ColorDist>((2 * sc0r + nc0) / (2 * nc0));
 712                                         c[0].g = refine_component_decode<ColorDist>((2 * sc0g + nc0) / (2 * nc0));
 713                                         c[0].b = refine_component_decode<ColorDist>((2 * sc0b + nc0) / (2 * nc0));
 714                                 }
 715                                 if(nc1)
 716                                 {
 717                                         c[1].r = refine_component_decode<ColorDist>((2 * sc1r + nc1) / (2 * nc1));
 718                                         c[1].g = refine_component_decode<ColorDist>((2 * sc1g + nc1) / (2 * nc1));
 719                                         c[1].b = refine_component_decode<ColorDist>((2 * sc1b + nc1) / (2 * nc1));
 720                                 }
 721
 722                                 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
 723                                 {
 724                                         int score_01 = 0;
 725                                         int score_23 = 0;
 726                                         for(x = 0; x < w; ++x)
 727                                                 for(y = 0; y < h; ++y)
 728                                                 {
 729                                                         int pindex = (x+y*4);
 730                                                         c[4].r = rgba[(x + y * iw) * 4 + 2];
 731                                                         c[4].g = rgba[(x + y * iw) * 4 + 1];
 732                                                         c[4].b = rgba[(x + y * iw) * 4 + 0];
 733                                                         if(!alpha_0_is_unimportant<ColorDist>::value)
 734                                                         {
 735                                                                 if(dxt == DXT5)
 736                                                                 {
 737                                                                         // check ENCODED alpha
 738                                                                         int bitindex_0 = pindex * 3;
 739                                                                         int bitindex_1 = bitindex_0 + 2;
 740                                                                         if(!testbit(&out[2], bitindex_0))
 741                                                                                 if(testbit(&out[2], bitindex_1))
 742                                                                                         continue;
 743                                                                 }
 744                                                                 else
 745                                                                 {
 746                                                                         // check ORIGINAL alpha (DXT1 and DXT3 preserve it)
 747                                                                         ca[4] = rgba[(x + y * iw) * 4 + 3];
 748                                                                         if(!ca[4])
 749                                                                                 continue;
 750                                                                 }
 751                                                         }
 752                                                         int bitindex = pindex * 2;
 753                                                         if(refine == REFINE_CHECK)
 754                                                         {
 755                                                                 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
 756                                                                 {
 757                                                                         // we picked an 1
 758                                                                         score_01 += ColorDist(c[1], c[4]);
 759                                                                         score_23 += ColorDist(c[3], c[4]);
 760                                                                 }
 761                                                                 else
 762                                                                 {
 763                                                                         // we picked a 0
 764                                                                         score_01 += ColorDist(c[0], c[4]);
 765                                                                         score_23 += ColorDist(c[2], c[4]);
 766                                                                 }
 767                                                         }
 768                                                         else if(refine == REFINE_LOOP)
 769                                                         {
 770                                                                 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
 771                                                                 {
 772                                                                         // we picked an 1
 773                                                                         score_23 += ColorDist(c[3], c[4]);
 774                                                                 }
 775                                                                 else
 776                                                                 {
 777                                                                         // we picked a 0
 778                                                                         score_23 += ColorDist(c[2], c[4]);
 779                                                                 }
 780                                                                 // we WILL run another loop iteration, if score_01 wins
 781                                                                 score_01 += min(ColorDist(c[0], c[4]), ColorDist(c[1], c[4]));
 782                                                         }
 783                                                 }
 784
 785                                         if(score_23 <= score_01)
 786                                         {
 787                                                 // refinement was BAD
 788                                                 c[0] = c[2];
 789                                                 c[1] = c[3];
 790                                         }
 791                                         else if(refine == REFINE_LOOP)
 792                                                 refined = true;
 793
 794                                         // alpha refinement is always good and doesn't
 795                                         // need to be checked because alpha is linear
 796
 797                                         // when looping, though, checking the
 798                                         // alpha COULD help, but we usually
 799                                         // loop twice anyway as refinement
 800                                         // usually helps
 801                                 }
 802                         }
 803                 }
 804                 while(refine == REFINE_LOOP && refined);
 805
 806                 if(refine != REFINE_NEVER)
 807                 {
 808                         if(dxt == DXT5)
 809                         {
 810                                 if(ca[1] < ca[0])
 811                                 {
 812                                         ca[2] = ca[0];
 813                                         ca[0] = ca[1];
 814                                         ca[1] = ca[2];
 815                                         // swap the alphas
 816                                         for(int pindex = 0; pindex < 16; ++pindex)
 817                                         {
 818                                                 int bitindex_set = pindex * 3;
 819                                                 int bitindex_test = bitindex_set + 2;
 820                                                 if(!testbit(&out[2], bitindex_test))
 821                                                         xorbit(&out[2], bitindex_set);
 822                                         }
 823                                 }
 824                         }
 825                         if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
 826                         // DXT1: select mode with 3 = transparent
 827                         // other: don't select this mode
 828                         {
 829                                 c[2] = c[0];
 830                                 c[0] = c[1];
 831                                 c[1] = c[2];
 832                                 // swap the colors
 833                                 if(dxt == DXT1)
 834                                 {
 835                                         out[4] ^= 0x55 & ~(out[4] >> 1);
 836                                         out[5] ^= 0x55 & ~(out[5] >> 1);
 837                                         out[6] ^= 0x55 & ~(out[6] >> 1);
 838                                         out[7] ^= 0x55 & ~(out[7] >> 1);
 839                                 }
 840                                 else
 841                                 {
 842                                         out[12] ^= 0x55 & ~(out[12] >> 1);
 843                                         out[13] ^= 0x55 & ~(out[13] >> 1);
 844                                         out[14] ^= 0x55 & ~(out[14] >> 1);
 845                                         out[15] ^= 0x55 & ~(out[15] >> 1);
 846                                 }
 847                         }
 848                 }
 849
 850                 switch(dxt)
 851                 {
 852                         case DXT5:
 853                                 out[0] = ca[0];
 854                                 out[1] = ca[1];
 855                         case DXT3:
 856                                 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
 857                                 out[9] = (c[0].r << 3) | (c[0].g >> 3);
 858                                 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
 859                                 out[11] = (c[1].r << 3) | (c[1].g >> 3);
 860                                 break;
 861                         case DXT1:
 862                                 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
 863                                 out[1] = (c[0].r << 3) | (c[0].g >> 3);
 864                                 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
 865                                 out[3] = (c[1].r << 3) | (c[1].g >> 3);
 866                                 break;
 867                 }
 868         }
 869
 870         // these color dist functions do not need the refinement check, as they always improve the situation
 871         template<ColorDistFunc ColorDist> struct need_refine_check
 872         {
 873                 static const bool value = true;
 874         };
 875         template<> struct need_refine_check<color_dist_avg>
 876         {
 877                 static const bool value = false;
 878         };
 879         template<> struct need_refine_check<color_dist_wavg>
 880         {
 881                 static const bool value = false;
 882         };
 883
 884         // compile time dispatch magic
 885         template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
 886         inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
 887         {
 888                 switch(refine)
 889                 {
 890                         case REFINE_NEVER:
 891                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
 892                         case REFINE_LOOP:
 893                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
 894                         case REFINE_CHECK:
 895                                 if(need_refine_check<ColorDist>::value)
 896                                         return s2tc_encode_block<dxt, ColorDist, mode, REFINE_CHECK>;
 897                         default:
 898                         case REFINE_ALWAYS:
 899                                 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
 900                 }
 901         }
 902
 903         // these color dist functions do not need the refinement check, as they always improve the situation
 904         template<ColorDistFunc ColorDist> struct supports_fast
 905         {
 906                 static const bool value = true;
 907         };
 908         template<> struct need_refine_check<color_dist_normalmap>
 909         {
 910                 static const bool value = false;
 911         };
 912
 913         template<DxtMode dxt, ColorDistFunc ColorDist>
 914         inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
 915         {
 916                 if(nrandom > 0)
 917                         return s2tc_encode_block_func<dxt, ColorDist, MODE_RANDOM>(refine);
 918                 else if(!supports_fast<ColorDist>::value || nrandom == 0) // MODE_FAST not supported for normalmaps, sorry
 919                         return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
 920                 else
 921                         return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
 922         }
 923
 924         template<ColorDistFunc ColorDist>
 925         inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
 926         {
 927                 switch(dxt)
 928                 {
 929                         case DXT1:
 930                                 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
 931                                 break;
 932                         case DXT3:
 933                                 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
 934                                 break;
 935                         default:
 936                         case DXT5:
 937                                 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
 938                                 break;
 939                 }
 940         }
 941 };
 942
 943 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
 944 {
 945         switch(cd)
 946         {
 947                 case RGB:
 948                         return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
 949                         break;
 950                 case YUV:
 951                         return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
 952                         break;
 953                 case SRGB:
 954                         return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
 955                         break;
 956                 case SRGB_MIXED:
 957                         return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
 958                         break;
 959                 case LAB:
 960                         return s2tc_encode_block_func<color_dist_lab_srgb>(dxt, nrandom, refine);
 961                         break;
 962                 case AVG:
 963                         return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
 964                         break;
 965                 default:
 966                 case WAVG:
 967                         return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
 968                         break;
 969                 case NORMALMAP:
 970                         return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
 971                         break;
 972         }
 973 }
 974
 975 namespace
 976 {
 977         inline int diffuse(int *diff, int src, int shift)
 978         {
 979                 int maxval = (1 << (8 - shift)) - 1;
 980                 src += *diff;
 981                 int ret = max(0, min(src >> shift, maxval));
 982                 // simulate decoding ("loop filter")
 983                 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
 984                 *diff = src - loop;
 985                 return ret;
 986         }
 987         inline int diffuse1(int *diff, int src)
 988         {
 989                 src += *diff;
 990                 int ret = (src >= 128);
 991                 int loop = ret ? 255 : 0;
 992                 *diff = src - loop;
 993                 return ret;
 994         }
 995 };
 996
 997 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int bgr, int alphabits)
 998 {
 999         int x, y;
1000         int diffuse_r = 0;
1001         int diffuse_g = 0;
1002         int diffuse_b = 0;
1003         int diffuse_a = 0;
1004         if(bgr)
1005         {
1006                 for(y = 0; y < h; ++y)
1007                         for(x = 0; x < w; ++x)
1008                         {
1009                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 2], 3);
1010                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1011                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 0], 3);
1012                         }
1013         }
1014         else
1015         {
1016                 for(y = 0; y < h; ++y)
1017                         for(x = 0; x < w; ++x)
1018                         {
1019                                 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1020                                 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1021                                 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1022                         }
1023         }
1024         if(srccomps == 4)
1025         {
1026                 if(alphabits == 1)
1027                 {
1028                         for(y = 0; y < h; ++y)
1029                                 for(x = 0; x < w; ++x)
1030                                         out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1031                 }
1032                 else if(alphabits == 8)
1033                 {
1034                         for(y = 0; y < h; ++y)
1035                                 for(x = 0; x < w; ++x)
1036                                         out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1037                 }
1038                 else
1039                 {
1040                         int alphadiffuse = 8 - alphabits;
1041                         for(y = 0; y < h; ++y)
1042                                 for(x = 0; x < w; ++x)
1043                                         out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], alphadiffuse);
1044                 }
1045         }
1046         else
1047         {
1048                 int alpharange = (1 << alphabits) - 1;
1049                 for(y = 0; y < h; ++y)
1050                         for(x = 0; x < w; ++x)
1051                                 out[(x + y * w) * 4 + 3] = alpharange;
1052         }
1053 }
1054