2 * Copyright (C) 2011 Rudolf Polzer All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 #define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
22 #include "s2tc_license.h"
32 #include "s2tc_algorithm.h"
33 #include "s2tc_common.h"
37 template<class T> struct color_type_info
40 template<> struct color_type_info<unsigned char>
42 static const unsigned char min_value = 0;
43 static const unsigned char max_value = 255;
50 inline color_t make_color_t()
52 return (color_t) {0, 0, 0};
54 inline color_t make_color_t(signed char r_, signed char g_, signed char b_)
56 return (color_t) {r_, g_, b_};
58 inline color_t make_color_t(int i)
60 return (color_t) {(signed char)(i >> 3), (signed char)(i >> 2), (signed char)(i >> 3)};
62 inline bool operator==(const color_t &a, const color_t &b)
64 return a.r == b.r && a.g == b.g && a.b == b.b;
66 inline bool operator<(const color_t &a, const color_t &b)
78 inline color_t &operator--(color_t &c)
103 inline color_t &operator++(color_t &c)
128 template<> struct color_type_info<color_t>
130 static const color_t min_value;
131 static const color_t max_value;
133 const color_t color_type_info<color_t>::min_value = { 0, 0, 0 };
134 const color_t color_type_info<color_t>::max_value = { 31, 63, 31 };
140 inline bigcolor_t(): r(0), g(0), b(0)
144 inline bigcolor_t &operator+=(const color_t &c)
152 inline bigcolor_t &operator+=(int v)
160 inline bigcolor_t operator+(int v)
162 bigcolor_t out = *this;
167 inline bigcolor_t &operator/=(int v)
175 inline bigcolor_t operator/(int v)
177 bigcolor_t out = *this;
182 inline bigcolor_t &operator<<=(int v)
190 inline bigcolor_t operator<<(int v)
192 bigcolor_t out = *this;
197 inline operator color_t()
207 std::ostream &operator<<(std::ostream &ost, const color_t &c)
209 return ost << "make_color_t(" << int(c.r) << ", " << int(c.g) << ", " << int(c.b) << ")";
212 std::ostream &operator<<(std::ostream &ost, const bigcolor_t &c)
214 return ost << "bigcolor_t(" << c.r << ", " << c.g << ", " << c.b << ")";
217 // 16 differences must fit in int
218 // i.e. a difference must be lower than 2^27
220 // shift right, rounded
221 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
223 inline int color_dist_avg(const color_t &a, const color_t &b)
225 int dr = a.r - b.r; // multiplier: 31 (-1..1)
226 int dg = a.g - b.g; // multiplier: 63 (-1..1)
227 int db = a.b - b.b; // multiplier: 31 (-1..1)
228 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
231 inline int color_dist_wavg(const color_t &a, const color_t &b)
233 int dr = a.r - b.r; // multiplier: 31 (-1..1)
234 int dg = a.g - b.g; // multiplier: 63 (-1..1)
235 int db = a.b - b.b; // multiplier: 31 (-1..1)
236 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
240 inline int color_dist_yuv(const color_t &a, const color_t &b)
242 int dr = a.r - b.r; // multiplier: 31 (-1..1)
243 int dg = a.g - b.g; // multiplier: 63 (-1..1)
244 int db = a.b - b.b; // multiplier: 31 (-1..1)
245 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
246 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
247 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
248 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
249 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
250 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
253 inline int color_dist_rgb(const color_t &a, const color_t &b)
255 int dr = a.r - b.r; // multiplier: 31 (-1..1)
256 int dg = a.g - b.g; // multiplier: 63 (-1..1)
257 int db = a.b - b.b; // multiplier: 31 (-1..1)
258 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
259 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
260 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
261 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
262 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
263 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
266 inline int color_dist_srgb(const color_t &a, const color_t &b)
268 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
269 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
270 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
271 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
272 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
273 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
274 int sy = SHRR(y, 3) * SHRR(y, 4);
275 int su = SHRR(u, 3) * SHRR(u, 4);
276 int sv = SHRR(v, 3) * SHRR(v, 4);
277 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
278 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
279 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
282 inline int srgb_get_y(const color_t &a)
285 int r = a.r * (int) a.r;
286 int g = a.g * (int) a.g;
287 int b = a.b * (int) a.b;
289 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
290 // square root it (!)
291 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
295 inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
298 int ay = srgb_get_y(a);
299 int by = srgb_get_y(b);
301 int au = a.r * 191 - ay;
302 int av = a.b * 191 - ay;
303 int bu = b.r * 191 - by;
304 int bv = b.b * 191 - by;
309 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
314 inline int color_dist_normalmap(const color_t &a, const color_t &b)
316 float ca[3], cb[3], n;
317 ca[0] = a.r / 31.0f * 2 - 1;
318 ca[1] = a.g / 63.0f * 2 - 1;
319 ca[2] = a.b / 31.0f * 2 - 1;
320 cb[0] = b.r / 31.0f * 2 - 1;
321 cb[1] = b.g / 63.0f * 2 - 1;
322 cb[2] = b.b / 31.0f * 2 - 1;
323 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
331 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
343 (cb[0] - ca[0]) * (cb[0] - ca[0])
345 (cb[1] - ca[1]) * (cb[1] - ca[1])
347 (cb[2] - ca[2]) * (cb[2] - ca[2])
350 // max value: 1000 * (4 + 4 + 4) = 6000
353 typedef int ColorDistFunc(const color_t &a, const color_t &b);
355 inline int alpha_dist(unsigned char a, unsigned char b)
357 return (a - (int) b) * (a - (int) b);
360 template <class T, class F>
362 // m: total color count (including non-counted inputs)
364 inline void reduce_colors_inplace(T *c, int n, int m, F dist)
372 for(i = 0; i < n; ++i)
375 for(j = i+1; j < n; ++j)
377 int d = dist(c[i], c[j]);
378 dists[i][j] = dists[j][i] = d;
384 for(j = 0; j < n; ++j)
386 int d = dist(c[i], c[j]);
390 for(i = 0; i < m; ++i)
391 for(j = i+1; j < m; ++j)
394 for(k = 0; k < n; ++k)
396 int di = dists[i][k];
397 int dj = dists[j][k];
401 if(bestsum < 0 || sum < bestsum)
412 template <class T, class F>
413 inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
415 // TODO fix this for ramp encoding!
422 for(i = 0; i < n; ++i)
425 for(j = i+1; j < n; ++j)
427 int d = dist(c[i], c[j]);
428 dists[i][j] = dists[j][i] = d;
434 for(j = 0; j < n; ++j)
436 int d = dist(c[i], c[j]);
440 // then the two extra rows
441 for(j = 0; j < n; ++j)
443 int d = dist(fix0, c[j]);
446 for(j = 0; j < n; ++j)
448 int d = dist(fix1, c[j]);
451 for(i = 0; i < m; ++i)
452 for(j = i+1; j < m; ++j)
455 for(k = 0; k < n; ++k)
457 int di = dists[i][k];
458 int dj = dists[j][k];
459 int d0 = dists[m][k];
460 int d1 = dists[m+1][k];
461 int m = min(min(di, dj), min(d0, d1));
464 if(bestsum < 0 || sum < bestsum)
483 template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
487 template<> inline int refine_component_encode<color_dist_srgb>(int comp)
491 template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
496 template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
500 template<> inline int refine_component_decode<color_dist_srgb>(int comp)
502 return sqrtf(comp) + 0.5f;
504 template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
506 return sqrtf(comp) + 0.5f;
509 template <class T, class Big, int scale_l>
510 struct s2tc_evaluate_colors_result_t;
512 template <class T, class Big>
513 struct s2tc_evaluate_colors_result_t<T, Big, 1>
522 inline s2tc_evaluate_colors_result_t():
523 n0(), n1(), S0(), S1()
526 inline void add(int l, T a)
539 inline bool evaluate(T &a, T &b)
544 a = ((S0 << 1) + n0) / (n0 << 1);
546 b = ((S1 << 1) + n1) / (n1 << 1);
551 template <class T, class Big, int scale_l>
552 struct s2tc_evaluate_colors_result_t
554 // a possible implementation of inferred color/alpha values
555 // refining would go here
559 struct s2tc_evaluate_colors_result_null_t
561 inline void add(int l, T a)
566 template<class T> T get(const unsigned char *buf)
574 template<> unsigned char get<unsigned char>(const unsigned char *buf)
576 return buf[3]; // extract alpha
579 template<class T, class Big, int bpp, bool have_trans, bool have_0_255, int n_input, class Dist, class Eval, class Arr>
580 inline unsigned int s2tc_try_encode_block(
584 const unsigned char *in, int iw, int w, int h,
585 const T colors_ref[])
587 unsigned int score = 0;
588 for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
591 const unsigned char *pix = &in[(y * iw + x) * 4];
597 out.do_or(i, (1 << bpp) - 1);
602 T color(get<T>(pix));
604 int bestdist = ColorDist(color, colors_ref[0]);
605 for(int k = 1; k < n_input; ++k)
607 int dist = ColorDist(color, colors_ref[k]);
616 int dist_0 = ColorDist(color, color_type_info<T>::min_value);
617 if(dist_0 <= bestdist)
620 out.do_or(i, (1 << bpp) - 2);
624 int dist_255 = ColorDist(color, color_type_info<T>::max_value);
625 if(dist_255 <= bestdist)
628 out.do_or(i, (1 << bpp) - 1);
635 res.add(best, color);
642 // REFINE_LOOP: refine, take result over only if score improved, loop until it did not
643 inline void s2tc_dxt5_encode_alpha_refine_loop(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
645 bitarray<uint64_t, 16, 3> out2;
646 unsigned char a0next = a0, a1next = a1;
647 unsigned int s = 0x7FFFFFFF;
650 unsigned char ramp[2] = {
654 s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
655 unsigned int s2 = s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out2, r2, alpha_dist, in, iw, w, h, ramp);
662 if(!r2.evaluate(a0next, a1next))
676 for(int i = 0; i < 16; ++i) switch(out.get(i))
687 for(int i = 0; i < 16; ++i) switch(out.get(i))
699 out.set(i, 7 - out.get(i));
705 // REFINE_ALWAYS: refine, do not check
706 inline void s2tc_dxt5_encode_alpha_refine_always(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
708 unsigned char ramp[2] = {
712 s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
713 s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out, r2, alpha_dist, in, iw, w, h, ramp);
722 for(int i = 0; i < 16; ++i) switch(out.get(i))
733 for(int i = 0; i < 16; ++i) switch(out.get(i))
745 out.set(i, 7 - out.get(i));
751 // REFINE_NEVER: do not refine
752 inline void s2tc_dxt5_encode_alpha_refine_never(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
756 unsigned char ramp[6] = {
760 s2tc_evaluate_colors_result_null_t<unsigned char> r2;
761 s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out, r2, alpha_dist, in, iw, w, h, ramp);
764 // REFINE_LOOP: refine, take result over only if score improved, loop until it did not
765 template<ColorDistFunc ColorDist, bool have_trans>
766 inline void s2tc_dxt1_encode_color_refine_loop(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
768 bitarray<uint32_t, 16, 2> out2;
769 color_t c0next = c0, c1next = c1;
770 unsigned int s = 0x7FFFFFFF;
777 s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
778 unsigned int s2 = s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out2, r2, ColorDist, in, iw, w, h, ramp);
785 if(!r2.evaluate(c0next, c1next))
795 if(c0 == color_type_info<color_t>::max_value)
799 for(int i = 0; i < 16; ++i)
800 if(!(out.get(i) == 1))
804 if(have_trans ? c1 < c0 : c0 < c1)
807 for(int i = 0; i < 16; ++i)
808 if(!(out.get(i) & 2))
813 // REFINE_ALWAYS: refine, do not check
814 template<ColorDistFunc ColorDist, bool have_trans>
815 inline void s2tc_dxt1_encode_color_refine_always(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
821 s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
822 s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
827 if(c0 == color_type_info<color_t>::max_value)
831 for(int i = 0; i < 16; ++i)
832 if(!(out.get(i) == 1))
836 if(have_trans ? c1 < c0 : c0 < c1)
839 for(int i = 0; i < 16; ++i)
840 if(!(out.get(i) & 2))
845 // REFINE_NEVER: do not refine
846 template<ColorDistFunc ColorDist, bool have_trans>
847 inline void s2tc_dxt1_encode_color_refine_never(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
849 if(have_trans ? c1 < c0 : c0 < c1)
855 s2tc_evaluate_colors_result_null_t<color_t> r2;
856 s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
859 inline void s2tc_dxt3_encode_alpha(bitarray<uint64_t, 16, 4> &out, const unsigned char *in, int iw, int w, int h)
861 for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
864 const unsigned char *pix = &in[(y * iw + x) * 4];
865 out.do_or(i, pix[3]);
869 template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
870 inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
872 color_t c[16 + (nrandom >= 0 ? nrandom : 0)];
873 unsigned char ca[16 + (nrandom >= 0 ? nrandom : 0)];
876 if(mode == MODE_FAST)
878 // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
880 color_t c0 = make_color_t(0, 0, 0);
882 // dummy values because we don't know whether the first pixel will write
889 int dmin = 0x7FFFFFFF;
897 for(x = 0; x < w; ++x)
898 for(y = 0; y < h; ++y)
900 c[2].r = rgba[(x + y * iw) * 4 + 0];
901 c[2].g = rgba[(x + y * iw) * 4 + 1];
902 c[2].b = rgba[(x + y * iw) * 4 + 2];
903 ca[2] = rgba[(x + y * iw) * 4 + 3];
907 // MODE_FAST doesn't work for normalmaps, so this works
909 int d = ColorDist(c[2], c0);
937 for(x = 0; x < w; ++x)
938 for(y = 0; y < h; ++y)
940 c[n].r = rgba[(x + y * iw) * 4 + 0];
941 c[n].g = rgba[(x + y * iw) * 4 + 1];
942 c[n].b = rgba[(x + y * iw) * 4 + 2];
943 ca[n] = rgba[(x + y * iw) * 4 + 3];
963 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
964 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
965 for(x = 1; x < n; ++x)
967 mins.r = min(mins.r, c[x].r);
968 mins.g = min(mins.g, c[x].g);
969 mins.b = min(mins.b, c[x].b);
970 maxs.r = max(maxs.r, c[x].r);
971 maxs.g = max(maxs.g, c[x].g);
972 maxs.b = max(maxs.b, c[x].b);
975 mina = min(mina, ca[x]);
976 maxa = max(maxa, ca[x]);
979 color_t len = make_color_t(maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1);
980 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
981 for(x = 0; x < nrandom; ++x)
983 c[m].r = mins.r + rand() % len.r;
984 c[m].g = mins.g + rand() % len.g;
985 c[m].b = mins.b + rand() % len.b;
987 ca[m] = mina + rand() % lena;
993 // hack for last miplevel
1001 reduce_colors_inplace(c, n, m, ColorDist);
1003 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
1006 // equal colors are BAD
1009 if(c[0] == color_type_info<color_t>::max_value)
1030 bitarray<uint32_t, 16, 2> colorblock;
1034 s2tc_dxt1_encode_color_refine_never<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1037 s2tc_dxt1_encode_color_refine_always<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1040 s2tc_dxt1_encode_color_refine_loop<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1043 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
1044 out[1] = (c[0].r << 3) | (c[0].g >> 3);
1045 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
1046 out[3] = (c[1].r << 3) | (c[1].g >> 3);
1047 colorblock.tobytes(&out[4]);
1052 bitarray<uint32_t, 16, 2> colorblock;
1053 bitarray<uint64_t, 16, 4> alphablock;
1057 s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1060 s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1063 s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1066 s2tc_dxt3_encode_alpha(alphablock, rgba, iw, w, h);
1067 alphablock.tobytes(&out[0]);
1068 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
1069 out[9] = (c[0].r << 3) | (c[0].g >> 3);
1070 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
1071 out[11] = (c[1].r << 3) | (c[1].g >> 3);
1072 colorblock.tobytes(&out[12]);
1077 bitarray<uint32_t, 16, 2> colorblock;
1078 bitarray<uint64_t, 16, 3> alphablock;
1082 s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1083 s2tc_dxt5_encode_alpha_refine_never(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1086 s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1087 s2tc_dxt5_encode_alpha_refine_always(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1090 s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1091 s2tc_dxt5_encode_alpha_refine_loop(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1096 alphablock.tobytes(&out[2]);
1097 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
1098 out[9] = (c[0].r << 3) | (c[0].g >> 3);
1099 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
1100 out[11] = (c[1].r << 3) | (c[1].g >> 3);
1101 colorblock.tobytes(&out[12]);
1107 // compile time dispatch magic
1108 template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
1109 inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
1114 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
1116 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
1119 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
1123 // these color dist functions do not need the refinement check, as they always improve the situation
1124 template<ColorDistFunc ColorDist> struct supports_fast
1126 static const bool value = true;
1128 template<> struct supports_fast<color_dist_normalmap>
1130 static const bool value = false;
1133 template<DxtMode dxt, ColorDistFunc ColorDist>
1134 inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
1136 if(!supports_fast<ColorDist>::value || nrandom >= 0)
1137 return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
1139 return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
1142 template<ColorDistFunc ColorDist>
1143 inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
1148 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
1151 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
1155 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
1161 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
1166 return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
1169 return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
1172 return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
1175 return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
1178 return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
1182 return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
1185 return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
1192 inline int diffuse(int *diff, int src, int shift)
1194 const int maxval = (1 << (8 - shift)) - 1;
1196 int ret = max(0, min(src >> shift, maxval));
1197 // simulate decoding ("loop filter")
1198 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
1202 inline int diffuse1(int *diff, int src)
1205 int ret = (src >= 128);
1206 // simulate decoding ("loop filter")
1207 int loop = ret ? 255 : 0;
1212 inline int floyd(int *thisrow, int *downrow, int src, int shift)
1214 const int maxval = (1 << (8 - shift)) - 1;
1215 src = (src << 4) | (src >> 4);
1217 int ret = max(0, min(src >> (shift + 4), maxval));
1218 // simulate decoding ("loop filter")
1219 int loop = (ret * 4095 / maxval);
1220 int err = src - loop;
1221 int e7 = (err * 7 + 8) / 16;
1223 int e3 = (err * 3 + 4) / 9;
1225 int e5 = (err * 5 + 3) / 6;
1235 inline int floyd1(int *thisrow, int *downrow, int src)
1237 src = (src << 4) | (src >> 4);
1239 int ret = (src >= 2048);
1240 // simulate decoding ("loop filter")
1241 int loop = ret ? 4095 : 0;
1242 int err = src - loop;
1243 int e7 = (err * 7 + 8) / 16;
1245 int e3 = (err * 3 + 4) / 9;
1247 int e5 = (err * 5 + 3) / 6;
1257 template<int srccomps, int alphabits, DitherMode dither>
1258 inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h)
1265 for(y = 0; y < h; ++y)
1266 for(x = 0; x < w; ++x)
1268 out[(x + y * w) * 4 + 0] = rgba[(x + y * w) * srccomps + 0] >> 3;
1269 out[(x + y * w) * 4 + 1] = rgba[(x + y * w) * srccomps + 1] >> 2;
1270 out[(x + y * w) * 4 + 2] = rgba[(x + y * w) * srccomps + 2] >> 3;
1276 for(y = 0; y < h; ++y)
1277 for(x = 0; x < w; ++x)
1278 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> 7;
1280 else if(alphabits == 8)
1282 for(y = 0; y < h; ++y)
1283 for(x = 0; x < w; ++x)
1284 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1288 for(y = 0; y < h; ++y)
1289 for(x = 0; x < w; ++x)
1290 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> (8 - alphabits);
1295 for(y = 0; y < h; ++y)
1296 for(x = 0; x < w; ++x)
1297 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1308 for(y = 0; y < h; ++y)
1309 for(x = 0; x < w; ++x)
1311 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1312 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1313 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1319 for(y = 0; y < h; ++y)
1320 for(x = 0; x < w; ++x)
1321 out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1323 else if(alphabits == 8)
1325 for(y = 0; y < h; ++y)
1326 for(x = 0; x < w; ++x)
1327 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1331 for(y = 0; y < h; ++y)
1332 for(x = 0; x < w; ++x)
1333 out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1338 for(y = 0; y < h; ++y)
1339 for(x = 0; x < w; ++x)
1340 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1344 case DITHER_FLOYDSTEINBERG:
1349 memset(downrow, 0, sizeof(downrow));
1350 int *thisrow_r, *thisrow_g, *thisrow_b, *thisrow_a;
1351 int *downrow_r, *downrow_g, *downrow_b, *downrow_a;
1352 for(y = 0; y < h; ++y)
1354 thisrow_r = downrow + ((y&1)?3:0) * pw;
1355 downrow_r = downrow + ((y&1)?0:3) * pw;
1356 memset(downrow_r, 0, sizeof(*downrow_r) * (3*pw));
1357 thisrow_g = thisrow_r + pw;
1358 thisrow_b = thisrow_g + pw;
1359 downrow_g = downrow_r + pw;
1360 downrow_b = downrow_g + pw;
1361 for(x = 0; x < w; ++x)
1363 out[(x + y * w) * 4 + 0] = floyd(&thisrow_r[x], &downrow_r[x], rgba[(x + y * w) * srccomps + 0], 3);
1364 out[(x + y * w) * 4 + 1] = floyd(&thisrow_g[x], &downrow_g[x], rgba[(x + y * w) * srccomps + 1], 2);
1365 out[(x + y * w) * 4 + 2] = floyd(&thisrow_b[x], &downrow_b[x], rgba[(x + y * w) * srccomps + 2], 3);
1372 for(y = 0; y < h; ++y)
1374 thisrow_a = downrow + (y&1) * pw;
1375 downrow_a = downrow + !(y&1) * pw;
1376 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1377 for(x = 0; x < w; ++x)
1378 out[(x + y * w) * 4 + 3] = floyd1(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3]);
1381 else if(alphabits == 8)
1383 for(y = 0; y < h; ++y)
1384 for(x = 0; x < w; ++x)
1385 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1389 for(y = 0; y < h; ++y)
1391 thisrow_a = downrow + (y&1) * pw;
1392 downrow_a = downrow + !(y&1) * pw;
1393 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1394 for(x = 0; x < w; ++x)
1395 out[(x + y * w) * 4 + 3] = floyd(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1401 for(y = 0; y < h; ++y)
1402 for(x = 0; x < w; ++x)
1403 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1410 template<int srccomps, int alphabits>
1411 inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, DitherMode dither)
1416 rgb565_image<srccomps, alphabits, DITHER_NONE>(out, rgba, w, h);
1420 rgb565_image<srccomps, alphabits, DITHER_SIMPLE>(out, rgba, w, h);
1422 case DITHER_FLOYDSTEINBERG:
1423 rgb565_image<srccomps, alphabits, DITHER_FLOYDSTEINBERG>(out, rgba, w, h);
1428 template<int srccomps>
1429 inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int alphabits, DitherMode dither)
1434 rgb565_image<srccomps, 1>(out, rgba, w, h, dither);
1437 rgb565_image<srccomps, 4>(out, rgba, w, h, dither);
1441 rgb565_image<srccomps, 8>(out, rgba, w, h, dither);
1447 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int alphabits, DitherMode dither)
1452 rgb565_image<3>(out, rgba, w, h, alphabits, dither);
1456 rgb565_image<4>(out, rgba, w, h, alphabits, dither);