2 * Copyright (C) 2011 Rudolf Polzer All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 #define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
22 #include "s2tc_license.h"
29 #include "s2tc_algorithm.h"
30 #include "s2tc_common.h"
40 inline bool operator<(const color_t &a, const color_t &b)
52 // 16 differences must fit in int
53 // i.e. a difference must be lower than 2^27
55 // shift right, rounded
56 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
58 inline int color_dist_avg(const color_t &a, const color_t &b)
60 int dr = a.r - b.r; // multiplier: 31 (-1..1)
61 int dg = a.g - b.g; // multiplier: 63 (-1..1)
62 int db = a.b - b.b; // multiplier: 31 (-1..1)
63 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
66 inline int color_dist_wavg(const color_t &a, const color_t &b)
68 int dr = a.r - b.r; // multiplier: 31 (-1..1)
69 int dg = a.g - b.g; // multiplier: 63 (-1..1)
70 int db = a.b - b.b; // multiplier: 31 (-1..1)
71 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
75 inline int color_dist_yuv(const color_t &a, const color_t &b)
77 int dr = a.r - b.r; // multiplier: 31 (-1..1)
78 int dg = a.g - b.g; // multiplier: 63 (-1..1)
79 int db = a.b - b.b; // multiplier: 31 (-1..1)
80 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
81 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
82 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
83 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
84 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
85 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
88 inline int color_dist_rgb(const color_t &a, const color_t &b)
90 int dr = a.r - b.r; // multiplier: 31 (-1..1)
91 int dg = a.g - b.g; // multiplier: 63 (-1..1)
92 int db = a.b - b.b; // multiplier: 31 (-1..1)
93 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
94 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
95 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
96 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
97 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
98 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
101 inline int color_dist_srgb(const color_t &a, const color_t &b)
103 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
104 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
105 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
106 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
107 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
108 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
109 int sy = SHRR(y, 3) * SHRR(y, 4);
110 int su = SHRR(u, 3) * SHRR(u, 4);
111 int sv = SHRR(v, 3) * SHRR(v, 4);
112 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
113 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
114 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
117 inline int srgb_get_y(const color_t &a)
120 int r = a.r * (int) a.r;
121 int g = a.g * (int) a.g;
122 int b = a.b * (int) a.b;
124 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
125 // square root it (!)
126 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
130 inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
133 int ay = srgb_get_y(a);
134 int by = srgb_get_y(b);
136 int au = a.r * 191 - ay;
137 int av = a.b * 191 - ay;
138 int bu = b.r * 191 - by;
139 int bv = b.b * 191 - by;
144 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
149 inline int color_dist_normalmap(const color_t &a, const color_t &b)
151 float ca[3], cb[3], n;
152 ca[0] = a.r / 31.0f * 2 - 1;
153 ca[1] = a.g / 63.0f * 2 - 1;
154 ca[2] = a.b / 31.0f * 2 - 1;
155 cb[0] = b.r / 31.0f * 2 - 1;
156 cb[1] = b.g / 63.0f * 2 - 1;
157 cb[2] = b.b / 31.0f * 2 - 1;
158 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
166 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
178 (cb[0] - ca[0]) * (cb[0] - ca[0])
180 (cb[1] - ca[1]) * (cb[1] - ca[1])
182 (cb[2] - ca[2]) * (cb[2] - ca[2])
185 // max value: 1000 * (4 + 4 + 4) = 6000
188 typedef int ColorDistFunc(const color_t &a, const color_t &b);
190 inline int alpha_dist(unsigned char a, unsigned char b)
192 return (a - (int) b) * (a - (int) b);
195 template <class T, class F>
197 // m: total color count (including non-counted inputs)
199 inline void reduce_colors_inplace(T *c, int n, int m, F dist)
207 for(i = 0; i < n; ++i)
210 for(j = i+1; j < n; ++j)
212 int d = dist(c[i], c[j]);
213 dists[i][j] = dists[j][i] = d;
219 for(j = 0; j < n; ++j)
221 int d = dist(c[i], c[j]);
225 for(i = 0; i < m; ++i)
226 for(j = i+1; j < m; ++j)
229 for(k = 0; k < n; ++k)
231 int di = dists[i][k];
232 int dj = dists[j][k];
236 if(bestsum < 0 || sum < bestsum)
248 template <class T, class F>
249 inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
257 for(i = 0; i < n; ++i)
260 for(j = i+1; j < n; ++j)
262 int d = dist(c[i], c[j]);
263 dists[i][j] = dists[j][i] = d;
269 for(j = 0; j < n; ++j)
271 int d = dist(c[i], c[j]);
275 // then the two extra rows
276 for(j = 0; j < n; ++j)
278 int d = dist(fix0, c[j]);
281 for(j = 0; j < n; ++j)
283 int d = dist(fix1, c[j]);
286 for(i = 0; i < m; ++i)
287 for(j = i+1; j < m; ++j)
290 for(k = 0; k < n; ++k)
292 int di = dists[i][k];
293 int dj = dists[j][k];
294 int d0 = dists[m][k];
295 int d1 = dists[m+1][k];
296 int m = min(min(di, dj), min(d0, d1));
299 if(bestsum < 0 || sum < bestsum)
319 template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
323 template<> inline int refine_component_encode<color_dist_srgb>(int comp)
327 template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
332 template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
336 template<> inline int refine_component_decode<color_dist_srgb>(int comp)
338 return sqrtf(comp) + 0.5f;
340 template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
342 return sqrtf(comp) + 0.5f;
345 template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
346 inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
348 color_t c[16 + (mode == MODE_RANDOM ? nrandom : 0)];
349 unsigned char ca[16 + (mode == MODE_RANDOM ? nrandom : 0)];
353 if(mode == MODE_FAST)
355 // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
357 color_t c0 = {0, 0, 0};
359 // dummy values because we don't know whether the first pixel willw rite
366 int dmin = 0x7FFFFFFF;
374 for(x = 0; x < w; ++x)
375 for(y = 0; y < h; ++y)
377 c[2].r = rgba[(x + y * iw) * 4 + 2];
378 c[2].g = rgba[(x + y * iw) * 4 + 1];
379 c[2].b = rgba[(x + y * iw) * 4 + 0];
380 ca[2] = rgba[(x + y * iw) * 4 + 3];
381 // MODE_FAST doesn't work for normalmaps, so this works
385 int d = ColorDist(c[2], c0);
409 // if ALL pixels were transparent, this won't stop us
415 for(x = 0; x < w; ++x)
416 for(y = 0; y < h; ++y)
418 ca[n] = rgba[(x + y * iw) * 4 + 3];
419 c[n].r = rgba[(x + y * iw) * 4 + 2];
420 c[n].g = rgba[(x + y * iw) * 4 + 1];
421 c[n].b = rgba[(x + y * iw) * 4 + 0];
434 if(mode == MODE_RANDOM)
438 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
439 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
440 for(x = 1; x < n; ++x)
442 mins.r = min(mins.r, c[x].r);
443 mins.g = min(mins.g, c[x].g);
444 mins.b = min(mins.b, c[x].b);
445 maxs.r = max(maxs.r, c[x].r);
446 maxs.g = max(maxs.g, c[x].g);
447 maxs.b = max(maxs.b, c[x].b);
450 mina = min(mina, ca[x]);
451 maxa = max(maxa, ca[x]);
454 color_t len = { maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1 };
455 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
456 for(x = 0; x < nrandom; ++x)
458 c[m].r = mins.r + rand() % len.r;
459 c[m].g = mins.g + rand() % len.g;
460 c[m].b = mins.b + rand() % len.b;
462 ca[m] = mina + rand() % lena;
468 // hack for last miplevel
476 reduce_colors_inplace(c, n, m, ColorDist);
478 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
481 if(refine == REFINE_NEVER)
487 // select mode with 6 = 0, 7 = 255
493 if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
494 // DXT1: select mode with 3 = transparent
495 // other: don't select this mode
506 int nc0 = 0, na0 = 0, sc0r = 0, sc0g = 0, sc0b = 0, sa0 = 0;
507 int nc1 = 0, na1 = 0, sc1r = 0, sc1g = 0, sc1b = 0, sa1 = 0;
508 if(refine == REFINE_LOOP)
511 memset(out, 0, (dxt == DXT1) ? 8 : 16);
512 for(x = 0; x < w; ++x)
513 for(y = 0; y < h; ++y)
515 int pindex = (x+y*4);
516 c[2].r = rgba[(x + y * iw) * 4 + 2];
517 c[2].g = rgba[(x + y * iw) * 4 + 1];
518 c[2].b = rgba[(x + y * iw) * 4 + 0];
519 ca[2] = rgba[(x + y * iw) * 4 + 3];
525 int bitindex = pindex * 3;
526 da[0] = alpha_dist(ca[0], ca[2]);
527 da[1] = alpha_dist(ca[1], ca[2]);
528 da[2] = alpha_dist(0, ca[2]);
529 da[3] = alpha_dist(255, ca[2]);
530 if(da[2] <= da[0] && da[2] <= da[1] && da[2] <= da[3])
534 setbit(&out[2], bitindex);
536 setbit(&out[2], bitindex);
538 else if(da[3] <= da[0] && da[3] <= da[1])
541 setbit(&out[2], bitindex);
543 setbit(&out[2], bitindex);
545 setbit(&out[2], bitindex);
547 else if(da[0] <= da[1])
550 if(refine != REFINE_NEVER)
559 setbit(&out[2], bitindex);
560 if(refine != REFINE_NEVER)
566 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
568 int bitindex = pindex * 2;
569 setbit(&out[12], bitindex);
570 if(refine != REFINE_NEVER)
573 sc1r += refine_component_encode<ColorDist>(c[2].r);
574 sc1g += refine_component_encode<ColorDist>(c[2].g);
575 sc1b += refine_component_encode<ColorDist>(c[2].b);
580 if(refine != REFINE_NEVER)
583 sc0r += refine_component_encode<ColorDist>(c[2].r);
584 sc0g += refine_component_encode<ColorDist>(c[2].g);
585 sc0b += refine_component_encode<ColorDist>(c[2].b);
592 int bitindex = pindex * 4;
593 setbit(&out[0], bitindex, ca[2]);
595 if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
597 int bitindex = pindex * 2;
598 setbit(&out[12], bitindex);
599 if(refine != REFINE_NEVER)
602 sc1r += refine_component_encode<ColorDist>(c[2].r);
603 sc1g += refine_component_encode<ColorDist>(c[2].g);
604 sc1b += refine_component_encode<ColorDist>(c[2].b);
609 if(refine != REFINE_NEVER)
612 sc0r += refine_component_encode<ColorDist>(c[2].r);
613 sc0g += refine_component_encode<ColorDist>(c[2].g);
614 sc0b += refine_component_encode<ColorDist>(c[2].b);
620 // the normalmap-uses-alpha-0 hack cannot be used here
621 int bitindex = pindex * 2;
623 setbit(&out[4], bitindex, 3);
624 else if(ColorDist(c[0], c[2]) > ColorDist(c[1], c[2]))
626 setbit(&out[4], bitindex);
627 if(refine != REFINE_NEVER)
630 sc1r += refine_component_encode<ColorDist>(c[2].r);
631 sc1g += refine_component_encode<ColorDist>(c[2].g);
632 sc1b += refine_component_encode<ColorDist>(c[2].b);
637 if(refine != REFINE_NEVER)
640 sc0r += refine_component_encode<ColorDist>(c[2].r);
641 sc0g += refine_component_encode<ColorDist>(c[2].g);
642 sc0b += refine_component_encode<ColorDist>(c[2].b);
649 if(refine != REFINE_NEVER)
651 // REFINEMENT: trick from libtxc_dxtn: reassign the colors to an average of the colors encoded with that value
656 ca[0] = (2 * sa0 + na0) / (2 * na0);
658 ca[1] = (2 * sa1 + na1) / (2 * na1);
660 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
667 c[0].r = refine_component_decode<ColorDist>((2 * sc0r + nc0) / (2 * nc0));
668 c[0].g = refine_component_decode<ColorDist>((2 * sc0g + nc0) / (2 * nc0));
669 c[0].b = refine_component_decode<ColorDist>((2 * sc0b + nc0) / (2 * nc0));
673 c[1].r = refine_component_decode<ColorDist>((2 * sc1r + nc1) / (2 * nc1));
674 c[1].g = refine_component_decode<ColorDist>((2 * sc1g + nc1) / (2 * nc1));
675 c[1].b = refine_component_decode<ColorDist>((2 * sc1b + nc1) / (2 * nc1));
678 if(refine == REFINE_CHECK || refine == REFINE_LOOP)
682 for(x = 0; x < w; ++x)
683 for(y = 0; y < h; ++y)
685 int pindex = (x+y*4);
686 c[4].r = rgba[(x + y * iw) * 4 + 2];
687 c[4].g = rgba[(x + y * iw) * 4 + 1];
688 c[4].b = rgba[(x + y * iw) * 4 + 0];
689 if(dxt == DXT1) // in DXT1, alpha 0 pixels are always skipped!
691 // check ORIGINAL alpha (DXT1 and DXT3 preserve it)
692 ca[4] = rgba[(x + y * iw) * 4 + 3];
693 if(!rgba[(x + y * iw) * 4 + 3])
696 int bitindex = pindex * 2;
697 if(refine == REFINE_CHECK)
699 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
702 score_01 += ColorDist(c[1], c[4]);
703 score_23 += ColorDist(c[3], c[4]);
708 score_01 += ColorDist(c[0], c[4]);
709 score_23 += ColorDist(c[2], c[4]);
712 else if(refine == REFINE_LOOP)
714 if(testbit(&out[(dxt == DXT1 ? 4 : 12)], bitindex))
717 score_23 += ColorDist(c[3], c[4]);
722 score_23 += ColorDist(c[2], c[4]);
724 // we WILL run another loop iteration, if score_01 wins
725 score_01 += min(ColorDist(c[0], c[4]), ColorDist(c[1], c[4]));
729 if(score_23 <= score_01)
731 // refinement was BAD
735 else if(refine == REFINE_LOOP)
738 // alpha refinement is always good and doesn't
739 // need to be checked because alpha is linear
741 // when looping, though, checking the
742 // alpha COULD help, but we usually
743 // loop twice anyway as refinement
748 while(refine == REFINE_LOOP && refined);
750 if(refine != REFINE_NEVER)
760 for(int pindex = 0; pindex < 16; ++pindex)
762 int bitindex_set = pindex * 3;
763 int bitindex_test = bitindex_set + 2;
764 if(!testbit(&out[2], bitindex_test))
765 xorbit(&out[2], bitindex_set);
769 if((dxt == DXT1) ? (c[1] < c[0]) : (c[0] < c[1]))
770 // DXT1: select mode with 3 = transparent
771 // other: don't select this mode
779 out[4] ^= 0x55 & ~(out[4] >> 1);
780 out[5] ^= 0x55 & ~(out[5] >> 1);
781 out[6] ^= 0x55 & ~(out[6] >> 1);
782 out[7] ^= 0x55 & ~(out[7] >> 1);
786 out[12] ^= 0x55 & ~(out[12] >> 1);
787 out[13] ^= 0x55 & ~(out[13] >> 1);
788 out[14] ^= 0x55 & ~(out[14] >> 1);
789 out[15] ^= 0x55 & ~(out[15] >> 1);
800 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
801 out[9] = (c[0].r << 3) | (c[0].g >> 3);
802 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
803 out[11] = (c[1].r << 3) | (c[1].g >> 3);
806 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
807 out[1] = (c[0].r << 3) | (c[0].g >> 3);
808 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
809 out[3] = (c[1].r << 3) | (c[1].g >> 3);
814 // these color dist functions do not need the refinement check, as they always improve the situation
815 template<ColorDistFunc ColorDist> struct need_refine_check
817 static const bool value = true;
819 template<> struct need_refine_check<color_dist_avg>
821 static const bool value = false;
823 template<> struct need_refine_check<color_dist_wavg>
825 static const bool value = false;
828 // compile time dispatch magic
829 template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
830 inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
835 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
837 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
839 if(need_refine_check<ColorDist>::value)
840 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_CHECK>;
843 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
847 // these color dist functions do not need the refinement check, as they always improve the situation
848 template<ColorDistFunc ColorDist> struct supports_fast
850 static const bool value = true;
852 template<> struct need_refine_check<color_dist_normalmap>
854 static const bool value = false;
857 template<DxtMode dxt, ColorDistFunc ColorDist>
858 inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
861 return s2tc_encode_block_func<dxt, ColorDist, MODE_RANDOM>(refine);
862 else if(!supports_fast<ColorDist>::value || nrandom == 0) // MODE_FAST not supported for normalmaps, sorry
863 return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
865 return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
868 template<ColorDistFunc ColorDist>
869 inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
874 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
877 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
881 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
887 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
892 return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
895 return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
898 return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
901 return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
904 return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
908 return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
911 return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
918 inline int diffuse(int *diff, int src, int shift)
920 const int maxval = (1 << (8 - shift)) - 1;
922 int ret = max(0, min(src >> shift, maxval));
923 // simulate decoding ("loop filter")
924 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
928 inline int diffuse1(int *diff, int src)
931 int ret = (src >= 128);
932 // simulate decoding ("loop filter")
933 int loop = ret ? 255 : 0;
938 inline int floyd(int *thisrow, int *downrow, int src, int shift)
940 const int maxval = (1 << (8 - shift)) - 1;
941 src = (src << 4) | (src >> 4);
943 int ret = max(0, min(src >> (shift + 4), maxval));
944 // simulate decoding ("loop filter")
945 int loop = (ret * 4095 / maxval);
946 int err = src - loop;
947 int e7 = (err * 7 + 8) / 16;
949 int e3 = (err * 3 + 4) / 9;
951 int e5 = (err * 5 + 3) / 6;
961 inline int floyd1(int *thisrow, int *downrow, int src)
963 src = (src << 4) | (src >> 4);
965 int ret = (src >= 2048);
966 // simulate decoding ("loop filter")
967 int loop = ret ? 4095 : 0;
968 int err = src - loop;
969 int e7 = (err * 7 + 8) / 16;
971 int e3 = (err * 3 + 4) / 9;
973 int e5 = (err * 5 + 3) / 6;
983 template<int srccomps, int alphabits, DitherMode dither>
984 inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h)
991 for(y = 0; y < h; ++y)
992 for(x = 0; x < w; ++x)
994 out[(x + y * w) * 4 + 2] = rgba[(x + y * w) * srccomps + 0] >> 3;
995 out[(x + y * w) * 4 + 1] = rgba[(x + y * w) * srccomps + 1] >> 2;
996 out[(x + y * w) * 4 + 0] = rgba[(x + y * w) * srccomps + 2] >> 3;
1002 for(y = 0; y < h; ++y)
1003 for(x = 0; x < w; ++x)
1004 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> 7;
1006 else if(alphabits == 8)
1008 for(y = 0; y < h; ++y)
1009 for(x = 0; x < w; ++x)
1010 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1014 int alphadiffuse = 8 - alphabits;
1015 for(y = 0; y < h; ++y)
1016 for(x = 0; x < w; ++x)
1017 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> (8 - alphabits);
1022 for(y = 0; y < h; ++y)
1023 for(x = 0; x < w; ++x)
1024 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1035 for(y = 0; y < h; ++y)
1036 for(x = 0; x < w; ++x)
1038 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1039 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1040 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1046 for(y = 0; y < h; ++y)
1047 for(x = 0; x < w; ++x)
1048 out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1050 else if(alphabits == 8)
1052 for(y = 0; y < h; ++y)
1053 for(x = 0; x < w; ++x)
1054 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1058 for(y = 0; y < h; ++y)
1059 for(x = 0; x < w; ++x)
1060 out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1065 for(y = 0; y < h; ++y)
1066 for(x = 0; x < w; ++x)
1067 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1071 case DITHER_FLOYDSTEINBERG:
1076 memset(downrow, 0, sizeof(downrow));
1077 int *thisrow_r, *thisrow_g, *thisrow_b, *thisrow_a;
1078 int *downrow_r, *downrow_g, *downrow_b, *downrow_a;
1079 for(y = 0; y < h; ++y)
1081 thisrow_r = downrow + ((y&1)?3:0) * pw;
1082 downrow_r = downrow + ((y&1)?0:3) * pw;
1083 memset(downrow_r, 0, sizeof(*downrow_r) * (3*pw));
1084 thisrow_g = thisrow_r + pw;
1085 thisrow_b = thisrow_g + pw;
1086 downrow_g = downrow_r + pw;
1087 downrow_b = downrow_g + pw;
1088 for(x = 0; x < w; ++x)
1090 out[(x + y * w) * 4 + 2] = floyd(&thisrow_r[x], &downrow_r[x], rgba[(x + y * w) * srccomps + 0], 3);
1091 out[(x + y * w) * 4 + 1] = floyd(&thisrow_g[x], &downrow_g[x], rgba[(x + y * w) * srccomps + 1], 2);
1092 out[(x + y * w) * 4 + 0] = floyd(&thisrow_b[x], &downrow_b[x], rgba[(x + y * w) * srccomps + 2], 3);
1099 for(y = 0; y < h; ++y)
1101 thisrow_a = downrow + (y&1) * pw;
1102 downrow_a = downrow + !(y&1) * pw;
1103 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1104 for(x = 0; x < w; ++x)
1105 out[(x + y * w) * 4 + 3] = floyd1(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3]);
1108 else if(alphabits == 8)
1110 for(y = 0; y < h; ++y)
1111 for(x = 0; x < w; ++x)
1112 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1116 for(y = 0; y < h; ++y)
1118 thisrow_a = downrow + (y&1) * pw;
1119 downrow_a = downrow + !(y&1) * pw;
1120 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1121 for(x = 0; x < w; ++x)
1122 out[(x + y * w) * 4 + 3] = floyd(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1128 for(y = 0; y < h; ++y)
1129 for(x = 0; x < w; ++x)
1130 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1137 template<int srccomps, int alphabits>
1138 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, DitherMode dither)
1143 rgb565_image<srccomps, alphabits, DITHER_NONE>(out, rgba, w, h);
1147 rgb565_image<srccomps, alphabits, DITHER_SIMPLE>(out, rgba, w, h);
1149 case DITHER_FLOYDSTEINBERG:
1150 rgb565_image<srccomps, alphabits, DITHER_FLOYDSTEINBERG>(out, rgba, w, h);
1155 template<int srccomps>
1156 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int alphabits, DitherMode dither)
1161 rgb565_image<srccomps, 1>(out, rgba, w, h, dither);
1164 rgb565_image<srccomps, 4>(out, rgba, w, h, dither);
1168 rgb565_image<srccomps, 8>(out, rgba, w, h, dither);
1174 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int alphabits, DitherMode dither)
1179 rgb565_image<3>(out, rgba, w, h, alphabits, dither);
1182 rgb565_image<4>(out, rgba, w, h, alphabits, dither);