5 #include "s2tc_compressor.h"
6 #include "s2tc_common.h"
16 inline bool operator<(const color_t &a, const color_t &b)
28 // 16 differences must fit in int
29 // i.e. a difference must be lower than 2^27
31 // shift right, rounded
32 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
34 inline int color_dist_avg(const color_t &a, const color_t &b)
36 int dr = a.r - b.r; // multiplier: 31 (-1..1)
37 int dg = a.g - b.g; // multiplier: 63 (-1..1)
38 int db = a.b - b.b; // multiplier: 31 (-1..1)
39 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
42 inline int color_dist_wavg(const color_t &a, const color_t &b)
44 int dr = a.r - b.r; // multiplier: 31 (-1..1)
45 int dg = a.g - b.g; // multiplier: 63 (-1..1)
46 int db = a.b - b.b; // multiplier: 31 (-1..1)
47 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
51 inline int color_dist_yuv(const color_t &a, const color_t &b)
53 int dr = a.r - b.r; // multiplier: 31 (-1..1)
54 int dg = a.g - b.g; // multiplier: 63 (-1..1)
55 int db = a.b - b.b; // multiplier: 31 (-1..1)
56 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
57 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
58 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
59 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
60 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
61 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
64 inline int color_dist_rgb(const color_t &a, const color_t &b)
66 int dr = a.r - b.r; // multiplier: 31 (-1..1)
67 int dg = a.g - b.g; // multiplier: 63 (-1..1)
68 int db = a.b - b.b; // multiplier: 31 (-1..1)
69 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
70 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
71 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
72 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
73 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
74 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
77 inline int color_dist_srgb(const color_t &a, const color_t &b)
79 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
80 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
81 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
82 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
83 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
84 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
85 int sy = SHRR(y, 3) * SHRR(y, 4);
86 int su = SHRR(u, 3) * SHRR(u, 4);
87 int sv = SHRR(v, 3) * SHRR(v, 4);
88 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
89 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
90 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
93 inline int srgb_get_y(const color_t &a)
96 int r = a.r * (int) a.r;
97 int g = a.g * (int) a.g;
98 int b = a.b * (int) a.b;
100 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
101 // square root it (!)
102 y = sqrt(y); // now in range 0 to 3815
106 inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
109 int ay = srgb_get_y(a);
110 int by = srgb_get_y(b);
112 int au = a.r * 191 - ay;
113 int av = a.b * 191 - ay;
114 int bu = b.r * 191 - by;
115 int bv = b.b * 191 - by;
120 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
125 // FIXME this is likely broken
126 inline int color_dist_lab_srgb(const color_t &a, const color_t &b)
129 float ar = powf(a.r / 31.0f, 2.4f);
130 float ag = powf(a.g / 63.0f, 2.4f);
131 float ab = powf(a.b / 31.0f, 2.4f);
132 float br = powf(b.r / 31.0f, 2.4f);
133 float bg = powf(b.g / 63.0f, 2.4f);
134 float bb = powf(b.b / 31.0f, 2.4f);
135 // convert to CIE XYZ
136 float aX = 0.4124f * ar + 0.3576f * ag + 0.1805f * ab;
137 float aY = 0.2126f * ar + 0.7152f * ag + 0.0722f * ab;
138 float aZ = 0.0193f * ar + 0.1192f * ag + 0.9505f * ab;
139 float bX = 0.4124f * br + 0.3576f * bg + 0.1805f * bb;
140 float bY = 0.2126f * br + 0.7152f * bg + 0.0722f * bb;
141 float bZ = 0.0193f * br + 0.1192f * bg + 0.9505f * bb;
142 // convert to CIE Lab
146 float aL = 116 * cbrtf(aY / Yn) - 16;
147 float aA = 500 * (cbrtf(aX / Xn) - cbrtf(aY / Yn));
148 float aB = 200 * (cbrtf(aY / Yn) - cbrtf(aZ / Zn));
149 float bL = 116 * cbrtf(bY / Yn) - 16;
150 float bA = 500 * (cbrtf(bX / Xn) - cbrtf(bY / Yn));
151 float bB = 200 * (cbrtf(bY / Yn) - cbrtf(bZ / Zn));
152 // euclidean distance, but moving weight away from A and B
153 return 1000 * ((aL - bL) * (aL - bL) + (aA - bA) * (aA - bA) + (aB - bB) * (aB - bB));
156 inline int color_dist_normalmap(const color_t &a, const color_t &b)
159 ca[0] = a.r / 31.0 * 2 - 1;
160 ca[1] = a.g / 63.0 * 2 - 1;
161 ca[2] = a.b / 31.0 * 2 - 1;
162 cb[0] = b.r / 31.0 * 2 - 1;
163 cb[1] = b.g / 63.0 * 2 - 1;
164 cb[2] = b.b / 31.0 * 2 - 1;
169 (cb[0] - ca[0]) * (cb[0] - ca[0])
171 (cb[1] - ca[1]) * (cb[1] - ca[1])
173 (cb[2] - ca[2]) * (cb[2] - ca[2])
176 // max value: 500 * (4 + 4 + 4) = 6000
179 typedef int ColorDistFunc(const color_t &a, const color_t &b);
181 inline int alpha_dist(unsigned char a, unsigned char b)
183 return (a - (int) b) * (a - (int) b);
186 template <class T, class F>
188 // m: total color count (including non-counted inputs)
190 inline void reduce_colors_inplace(T *c, int n, int m, F dist)
198 for(i = 0; i < n; ++i)
201 for(j = i+1; j < n; ++j)
203 int d = dist(c[i], c[j]);
204 dists[i][j] = dists[j][i] = d;
210 for(j = 0; j < n; ++j)
212 int d = dist(c[i], c[j]);
216 for(i = 0; i < m; ++i)
217 for(j = i+1; j < m; ++j)
220 for(k = 0; k < n; ++k)
222 int di = dists[i][k];
223 int dj = dists[j][k];
227 if(bestsum < 0 || sum < bestsum)
239 template <class T, class F>
240 inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
248 for(i = 0; i < n; ++i)
251 for(j = i+1; j < n; ++j)
253 int d = dist(c[i], c[j]);
254 dists[i][j] = dists[j][i] = d;
260 for(j = 0; j < n; ++j)
262 int d = dist(c[i], c[j]);
266 // then the two extra rows
267 for(j = 0; j < n; ++j)
269 int d = dist(fix0, c[j]);
272 for(j = 0; j < n; ++j)
274 int d = dist(fix1, c[j]);
277 for(i = 0; i < m; ++i)
278 for(j = i+1; j < m; ++j)
281 for(k = 0; k < n; ++k)
283 int di = dists[i][k];
284 int dj = dists[j][k];
285 int d0 = dists[m][k];
286 int d1 = dists[m+1][k];
287 int m = min(min(di, dj), min(d0, d1));
290 if(bestsum < 0 || sum < bestsum)
310 template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, bool refine>
311 inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
313 color_t c[16 + (mode == MODE_RANDOM ? nrandom : 0)];
315 unsigned char ca[16];
319 if(mode == MODE_FAST)
321 color_t c0 = {0, 0, 0};
327 int dmin = ColorDist(c[0], c0);
335 for(x = 0; x < w; ++x)
336 for(y = !x; y < h; ++y)
338 c[2].r = rgba[(x + y * iw) * 4 + 2];
339 c[2].g = rgba[(x + y * iw) * 4 + 1];
340 c[2].b = rgba[(x + y * iw) * 4 + 0];
342 int d = ColorDist(c[2], c0);
356 ca[2] = rgba[(x + y * iw) * 4 + 3];
368 for(x = 0; x < w; ++x)
369 for(y = 0; y < h; ++y)
371 c[n].r = rgba[(x + y * iw) * 4 + 2];
372 c[n].g = rgba[(x + y * iw) * 4 + 1];
373 c[n].b = rgba[(x + y * iw) * 4 + 0];
375 ca[n] = rgba[(x + y * iw) * 4 + 3];
380 if(mode == MODE_RANDOM)
384 for(x = 1; x < n; ++x)
386 mins.r = min(mins.r, c[x].r);
387 mins.g = min(mins.g, c[x].g);
388 mins.b = min(mins.b, c[x].b);
389 maxs.r = max(maxs.r, c[x].r);
390 maxs.g = max(maxs.g, c[x].g);
391 maxs.b = max(maxs.b, c[x].b);
393 color_t len = { maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1 };
394 for(x = 0; x < nrandom; ++x)
396 c[m].r = mins.r + rand() % len.r;
397 c[m].g = mins.g + rand() % len.g;
398 c[m].b = mins.b + rand() % len.b;
404 // hack for last miplevel
412 reduce_colors_inplace(c, n, m, ColorDist);
414 reduce_colors_inplace_2fixpoints(ca, n, n, alpha_dist, (unsigned char) 0, (unsigned char) 255);
436 int nc0 = 0, na0 = 0, sc0r = 0, sc0g = 0, sc0b = 0, sa0 = 0;
437 int nc1 = 0, na1 = 0, sc1r = 0, sc1g = 0, sc1b = 0, sa1 = 0;
439 memset(out, 0, (dxt == DXT1) ? 8 : 16);
440 for(x = 0; x < w; ++x)
441 for(y = 0; y < h; ++y)
443 int pindex = (x+y*4);
444 c[2].r = rgba[(x + y * iw) * 4 + 2];
445 c[2].g = rgba[(x + y * iw) * 4 + 1];
446 c[2].b = rgba[(x + y * iw) * 4 + 0];
447 ca[2] = rgba[(x + y * iw) * 4 + 3];
453 int bitindex = pindex * 3;
454 da[0] = alpha_dist(ca[0], ca[2]);
455 da[1] = alpha_dist(ca[1], ca[2]);
456 da[2] = alpha_dist(0, ca[2]);
457 da[3] = alpha_dist(255, ca[2]);
458 if(da[2] <= da[0] && da[2] <= da[1] && da[2] <= da[3])
462 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
464 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
466 else if(da[3] <= da[0] && da[3] <= da[1])
469 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
471 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
473 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
475 else if(da[0] <= da[1])
487 out[bitindex / 8 + 2] |= (1 << (bitindex % 8));
495 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
497 int bitindex = pindex * 2;
498 out[bitindex / 8 + 12] |= (1 << (bitindex % 8));
520 int bitindex = pindex * 4;
521 out[bitindex / 8 + 0] |= (ca[2] << (bitindex % 8));
523 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
525 int bitindex = pindex * 2;
526 out[bitindex / 8 + 12] |= (1 << (bitindex % 8));
548 int bitindex = pindex * 2;
550 out[bitindex / 8 + 4] |= (3 << (bitindex % 8));
551 else if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
553 out[bitindex / 8 + 4] |= (1 << (bitindex % 8));
581 ca[0] = (2 * sa0 + na0) / (2 * na0);
583 ca[1] = (2 * sa1 + na1) / (2 * na1);
587 c[0].r = (2 * sc0r + nc0) / (2 * nc0);
588 c[0].g = (2 * sc0g + nc0) / (2 * nc0);
589 c[0].b = (2 * sc0b + nc0) / (2 * nc0);
593 c[1].r = (2 * sc1r + nc1) / (2 * nc1);
594 c[1].g = (2 * sc1g + nc1) / (2 * nc1);
595 c[1].b = (2 * sc1b + nc1) / (2 * nc1);
606 for(int pindex = 0; pindex < 16; ++pindex)
608 int bitindex_set = pindex * 3;
609 int bitindex_test = bitindex_set + 3;
610 if(!(out[bitindex_test / 8] & (1 << (bitindex_test % 8))))
611 out[bitindex_set / 8] ^= (1 << (bitindex_set % 8));
623 out[4] ^= 0x55 & ~(out[4] >> 1);
624 out[5] ^= 0x55 & ~(out[5] >> 1);
625 out[6] ^= 0x55 & ~(out[6] >> 1);
626 out[7] ^= 0x55 & ~(out[7] >> 1);
630 out[12] ^= 0x55 & ~(out[12] >> 1);
631 out[13] ^= 0x55 & ~(out[13] >> 1);
632 out[14] ^= 0x55 & ~(out[14] >> 1);
633 out[15] ^= 0x55 & ~(out[15] >> 1);
643 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
644 out[9] = (c[0].r << 3) | (c[0].g >> 3);
645 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
646 out[11] = (c[1].r << 3) | (c[1].g >> 3);
649 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
650 out[1] = (c[0].r << 3) | (c[0].g >> 3);
651 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
652 out[3] = (c[1].r << 3) | (c[1].g >> 3);
657 // compile time dispatch magic
658 template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
659 inline s2tc_encode_block_func_t s2tc_encode_block_func(bool refine)
662 return s2tc_encode_block<dxt, ColorDist, mode, true>;
664 return s2tc_encode_block<dxt, ColorDist, mode, false>;
667 template<DxtMode dxt, ColorDistFunc ColorDist>
668 inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, bool refine)
671 return s2tc_encode_block_func<dxt, ColorDist, MODE_RANDOM>(refine);
672 else if(nrandom == 0)
673 return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
675 return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
678 template<ColorDistFunc ColorDist>
679 inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, bool refine)
684 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
687 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
691 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
697 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, bool refine)
702 return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
705 return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
708 return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
711 return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
714 return s2tc_encode_block_func<color_dist_lab_srgb>(dxt, nrandom, refine);
717 return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
721 return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
724 return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
731 inline int diffuse(int *diff, int src, int shift)
733 int maxval = (1 << (8 - shift)) - 1;
735 int ret = max(0, min(src >> shift, maxval));
736 // simulate decoding ("loop filter")
737 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
743 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int bgr, int alphabits)
752 for(y = 0; y < h; ++y)
753 for(x = 0; x < w; ++x)
755 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 2], 3);
756 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
757 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 0], 3);
762 for(y = 0; y < h; ++y)
763 for(x = 0; x < w; ++x)
765 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
766 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
767 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
772 int alphadiffuse = 8 - alphabits;
773 for(y = 0; y < h; ++y)
774 for(x = 0; x < w; ++x)
775 out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], alphadiffuse);
779 int alpharange = (1 << alphabits) - 1;
780 for(y = 0; y < h; ++y)
781 for(x = 0; x < w; ++x)
782 out[(x + y * w) * 4 + 3] = alpharange;