2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
23 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
24 {BGR,RGB}{1,4,8,15,16} support dithering
26 unscaled special converters (YV12=I420=IYUV, Y800=Y8)
27 YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
32 BGR24 -> BGR32 & RGB24 -> RGB32
33 BGR32 -> BGR24 & RGB32 -> RGB24
38 tested special converters (most are tested actually, but I did not write it down ...)
45 untested special converters
46 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
47 YV12/I420 -> YV12/I420
48 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
49 BGR24 -> BGR32 & RGB24 -> RGB32
50 BGR32 -> BGR24 & RGB32 -> RGB24
61 #include "swscale_internal.h"
63 #include "libavutil/intreadwrite.h"
64 #include "libavutil/cpu.h"
65 #include "libavutil/avutil.h"
66 #include "libavutil/mathematics.h"
67 #include "libavutil/bswap.h"
68 #include "libavutil/pixdesc.h"
72 #define RGB2YUV_SHIFT 15
73 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
74 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
75 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
76 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
77 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
78 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
79 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
80 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
81 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
88 more intelligent misalignment avoidance for the horizontal scaler
89 write special vertical cubic upscale version
90 optimize C code (YV12 / minmax)
91 add support for packed pixel YUV input & output
92 add support for Y8 output
93 optimize BGR24 & BGR32
94 add BGR4 output support
95 write special BGR->BGR scaler
98 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
99 { 1, 3, 1, 3, 1, 3, 1, 3, },
100 { 2, 0, 2, 0, 2, 0, 2, 0, },
103 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
104 { 6, 2, 6, 2, 6, 2, 6, 2, },
105 { 0, 4, 0, 4, 0, 4, 0, 4, },
108 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
109 { 8, 4, 11, 7, 8, 4, 11, 7, },
110 { 2, 14, 1, 13, 2, 14, 1, 13, },
111 { 10, 6, 9, 5, 10, 6, 9, 5, },
112 { 0, 12, 3, 15, 0, 12, 3, 15, },
115 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
116 { 17, 9, 23, 15, 16, 8, 22, 14, },
117 { 5, 29, 3, 27, 4, 28, 2, 26, },
118 { 21, 13, 19, 11, 20, 12, 18, 10, },
119 { 0, 24, 6, 30, 1, 25, 7, 31, },
120 { 16, 8, 22, 14, 17, 9, 23, 15, },
121 { 4, 28, 2, 26, 5, 29, 3, 27, },
122 { 20, 12, 18, 10, 21, 13, 19, 11, },
123 { 1, 25, 7, 31, 0, 24, 6, 30, },
126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
127 { 0, 55, 14, 68, 3, 58, 17, 72, },
128 { 37, 18, 50, 32, 40, 22, 54, 35, },
129 { 9, 64, 5, 59, 13, 67, 8, 63, },
130 { 46, 27, 41, 23, 49, 31, 44, 26, },
131 { 2, 57, 16, 71, 1, 56, 15, 70, },
132 { 39, 21, 52, 34, 38, 19, 51, 33, },
133 { 11, 66, 7, 62, 10, 65, 6, 60, },
134 { 48, 30, 43, 25, 47, 29, 42, 24, },
138 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
139 {117, 62, 158, 103, 113, 58, 155, 100, },
140 { 34, 199, 21, 186, 31, 196, 17, 182, },
141 {144, 89, 131, 76, 141, 86, 127, 72, },
142 { 0, 165, 41, 206, 10, 175, 52, 217, },
143 {110, 55, 151, 96, 120, 65, 162, 107, },
144 { 28, 193, 14, 179, 38, 203, 24, 189, },
145 {138, 83, 124, 69, 148, 93, 134, 79, },
146 { 7, 172, 48, 213, 3, 168, 45, 210, },
149 // tries to correct a gamma of 1.5
150 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
151 { 0, 143, 18, 200, 2, 156, 25, 215, },
152 { 78, 28, 125, 64, 89, 36, 138, 74, },
153 { 10, 180, 3, 161, 16, 195, 8, 175, },
154 {109, 51, 93, 38, 121, 60, 105, 47, },
155 { 1, 152, 23, 210, 0, 147, 20, 205, },
156 { 85, 33, 134, 71, 81, 30, 130, 67, },
157 { 14, 190, 6, 171, 12, 185, 5, 166, },
158 {117, 57, 101, 44, 113, 54, 97, 41, },
161 // tries to correct a gamma of 2.0
162 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
163 { 0, 124, 8, 193, 0, 140, 12, 213, },
164 { 55, 14, 104, 42, 66, 19, 119, 52, },
165 { 3, 168, 1, 145, 6, 187, 3, 162, },
166 { 86, 31, 70, 21, 99, 39, 82, 28, },
167 { 0, 134, 11, 206, 0, 129, 9, 200, },
168 { 62, 17, 114, 48, 58, 16, 109, 45, },
169 { 5, 181, 2, 157, 4, 175, 1, 151, },
170 { 95, 36, 78, 26, 90, 34, 74, 24, },
173 // tries to correct a gamma of 2.5
174 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
175 { 0, 107, 3, 187, 0, 125, 6, 212, },
176 { 39, 7, 86, 28, 49, 11, 102, 36, },
177 { 1, 158, 0, 131, 3, 180, 1, 151, },
178 { 68, 19, 52, 12, 81, 25, 64, 17, },
179 { 0, 119, 5, 203, 0, 113, 4, 195, },
180 { 45, 9, 96, 33, 42, 8, 91, 30, },
181 { 2, 172, 1, 144, 2, 165, 0, 137, },
182 { 77, 23, 60, 15, 72, 21, 56, 14, },
186 static av_always_inline void
187 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
188 int lumFilterSize, const int16_t *chrFilter,
189 const int32_t **chrUSrc, const int32_t **chrVSrc,
190 int chrFilterSize, const int32_t **alpSrc,
191 uint16_t *dest[4], int dstW, int chrDstW,
192 int big_endian, int output_bits)
194 //FIXME Optimize (just quickly written not optimized..)
196 uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
197 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
198 int shift = 15 + 16 - output_bits;
200 #define output_pixel(pos, val) \
202 if (output_bits == 16) { \
203 AV_WB16(pos, av_clip_uint16(val >> shift)); \
205 AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
208 if (output_bits == 16) { \
209 AV_WL16(pos, av_clip_uint16(val >> shift)); \
211 AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
214 for (i = 0; i < dstW; i++) {
215 int val = 1 << (30-output_bits);
218 for (j = 0; j < lumFilterSize; j++)
219 val += lumSrc[j][i] * lumFilter[j];
221 output_pixel(&yDest[i], val);
225 for (i = 0; i < chrDstW; i++) {
226 int u = 1 << (30-output_bits);
227 int v = 1 << (30-output_bits);
230 for (j = 0; j < chrFilterSize; j++) {
231 u += chrUSrc[j][i] * chrFilter[j];
232 v += chrVSrc[j][i] * chrFilter[j];
235 output_pixel(&uDest[i], u);
236 output_pixel(&vDest[i], v);
240 if (CONFIG_SWSCALE_ALPHA && aDest) {
241 for (i = 0; i < dstW; i++) {
242 int val = 1 << (30-output_bits);
245 for (j = 0; j < lumFilterSize; j++)
246 val += alpSrc[j][i] * lumFilter[j];
248 output_pixel(&aDest[i], val);
254 #define yuv2NBPS(bits, BE_LE, is_be) \
255 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
256 const int16_t **_lumSrc, int lumFilterSize, \
257 const int16_t *chrFilter, const int16_t **_chrUSrc, \
258 const int16_t **_chrVSrc, \
259 int chrFilterSize, const int16_t **_alpSrc, \
260 uint8_t *_dest[4], int dstW, int chrDstW) \
262 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
263 **chrUSrc = (const int32_t **) _chrUSrc, \
264 **chrVSrc = (const int32_t **) _chrVSrc, \
265 **alpSrc = (const int32_t **) _alpSrc; \
266 yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
267 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
268 alpSrc, (uint16_t **) _dest, \
269 dstW, chrDstW, is_be, bits); \
278 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
279 const int16_t **lumSrc, int lumFilterSize,
280 const int16_t *chrFilter, const int16_t **chrUSrc,
281 const int16_t **chrVSrc,
282 int chrFilterSize, const int16_t **alpSrc,
283 uint8_t *dest[4], int dstW, int chrDstW)
285 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
286 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
289 //FIXME Optimize (just quickly written not optimized..)
290 for (i=0; i<dstW; i++) {
293 for (j=0; j<lumFilterSize; j++)
294 val += lumSrc[j][i] * lumFilter[j];
296 yDest[i]= av_clip_uint8(val>>19);
300 for (i=0; i<chrDstW; i++) {
304 for (j=0; j<chrFilterSize; j++) {
305 u += chrUSrc[j][i] * chrFilter[j];
306 v += chrVSrc[j][i] * chrFilter[j];
309 uDest[i]= av_clip_uint8(u>>19);
310 vDest[i]= av_clip_uint8(v>>19);
313 if (CONFIG_SWSCALE_ALPHA && aDest)
314 for (i=0; i<dstW; i++) {
317 for (j=0; j<lumFilterSize; j++)
318 val += alpSrc[j][i] * lumFilter[j];
320 aDest[i]= av_clip_uint8(val>>19);
324 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
325 const int16_t *chrUSrc, const int16_t *chrVSrc,
326 const int16_t *alpSrc,
327 uint8_t *dest[4], int dstW, int chrDstW)
329 uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
330 *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
333 for (i=0; i<dstW; i++) {
334 int val= (lumSrc[i]+64)>>7;
335 yDest[i]= av_clip_uint8(val);
339 for (i=0; i<chrDstW; i++) {
340 int u=(chrUSrc[i]+64)>>7;
341 int v=(chrVSrc[i]+64)>>7;
342 uDest[i]= av_clip_uint8(u);
343 vDest[i]= av_clip_uint8(v);
346 if (CONFIG_SWSCALE_ALPHA && aDest)
347 for (i=0; i<dstW; i++) {
348 int val= (alpSrc[i]+64)>>7;
349 aDest[i]= av_clip_uint8(val);
353 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
354 const int16_t **lumSrc, int lumFilterSize,
355 const int16_t *chrFilter, const int16_t **chrUSrc,
356 const int16_t **chrVSrc, int chrFilterSize,
357 const int16_t **alpSrc, uint8_t *dest[4],
358 int dstW, int chrDstW)
360 uint8_t *yDest = dest[0], *uDest = dest[1];
361 enum PixelFormat dstFormat = c->dstFormat;
363 //FIXME Optimize (just quickly written not optimized..)
365 for (i=0; i<dstW; i++) {
368 for (j=0; j<lumFilterSize; j++)
369 val += lumSrc[j][i] * lumFilter[j];
371 yDest[i]= av_clip_uint8(val>>19);
377 if (dstFormat == PIX_FMT_NV12)
378 for (i=0; i<chrDstW; i++) {
382 for (j=0; j<chrFilterSize; j++) {
383 u += chrUSrc[j][i] * chrFilter[j];
384 v += chrVSrc[j][i] * chrFilter[j];
387 uDest[2*i]= av_clip_uint8(u>>19);
388 uDest[2*i+1]= av_clip_uint8(v>>19);
391 for (i=0; i<chrDstW; i++) {
395 for (j=0; j<chrFilterSize; j++) {
396 u += chrUSrc[j][i] * chrFilter[j];
397 v += chrVSrc[j][i] * chrFilter[j];
400 uDest[2*i]= av_clip_uint8(v>>19);
401 uDest[2*i+1]= av_clip_uint8(u>>19);
405 #define output_pixel(pos, val) \
406 if (target == PIX_FMT_GRAY16BE) { \
412 static av_always_inline void
413 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
414 const int32_t **lumSrc, int lumFilterSize,
415 const int16_t *chrFilter, const int32_t **chrUSrc,
416 const int32_t **chrVSrc, int chrFilterSize,
417 const int32_t **alpSrc, uint16_t *dest, int dstW,
418 int y, enum PixelFormat target)
422 for (i = 0; i < (dstW >> 1); i++) {
427 for (j = 0; j < lumFilterSize; j++) {
428 Y1 += lumSrc[j][i * 2] * lumFilter[j];
429 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
433 if ((Y1 | Y2) & 0x10000) {
434 Y1 = av_clip_uint16(Y1);
435 Y2 = av_clip_uint16(Y2);
437 output_pixel(&dest[i * 2 + 0], Y1);
438 output_pixel(&dest[i * 2 + 1], Y2);
442 static av_always_inline void
443 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
444 const int32_t *ubuf[2], const int32_t *vbuf[2],
445 const int32_t *abuf[2], uint16_t *dest, int dstW,
446 int yalpha, int uvalpha, int y,
447 enum PixelFormat target)
449 int yalpha1 = 4095 - yalpha;
451 const int32_t *buf0 = buf[0], *buf1 = buf[1];
453 for (i = 0; i < (dstW >> 1); i++) {
454 int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
455 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
457 output_pixel(&dest[i * 2 + 0], Y1);
458 output_pixel(&dest[i * 2 + 1], Y2);
462 static av_always_inline void
463 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
464 const int32_t *ubuf[2], const int32_t *vbuf[2],
465 const int32_t *abuf0, uint16_t *dest, int dstW,
466 int uvalpha, int y, enum PixelFormat target)
470 for (i = 0; i < (dstW >> 1); i++) {
471 int Y1 = buf0[i * 2 ] << 1;
472 int Y2 = buf0[i * 2 + 1] << 1;
474 output_pixel(&dest[i * 2 + 0], Y1);
475 output_pixel(&dest[i * 2 + 1], Y2);
481 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
482 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
483 const int16_t **_lumSrc, int lumFilterSize, \
484 const int16_t *chrFilter, const int16_t **_chrUSrc, \
485 const int16_t **_chrVSrc, int chrFilterSize, \
486 const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
489 const int32_t **lumSrc = (const int32_t **) _lumSrc, \
490 **chrUSrc = (const int32_t **) _chrUSrc, \
491 **chrVSrc = (const int32_t **) _chrVSrc, \
492 **alpSrc = (const int32_t **) _alpSrc; \
493 uint16_t *dest = (uint16_t *) _dest; \
494 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
495 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
496 alpSrc, dest, dstW, y, fmt); \
499 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
500 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
501 const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
502 int yalpha, int uvalpha, int y) \
504 const int32_t **buf = (const int32_t **) _buf, \
505 **ubuf = (const int32_t **) _ubuf, \
506 **vbuf = (const int32_t **) _vbuf, \
507 **abuf = (const int32_t **) _abuf; \
508 uint16_t *dest = (uint16_t *) _dest; \
509 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
510 dest, dstW, yalpha, uvalpha, y, fmt); \
513 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
514 const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
515 const int16_t *_abuf0, uint8_t *_dest, int dstW, \
516 int uvalpha, int y) \
518 const int32_t *buf0 = (const int32_t *) _buf0, \
519 **ubuf = (const int32_t **) _ubuf, \
520 **vbuf = (const int32_t **) _vbuf, \
521 *abuf0 = (const int32_t *) _abuf0; \
522 uint16_t *dest = (uint16_t *) _dest; \
523 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
524 dstW, uvalpha, y, fmt); \
527 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
528 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
530 #define output_pixel(pos, acc) \
531 if (target == PIX_FMT_MONOBLACK) { \
537 static av_always_inline void
538 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
539 const int16_t **lumSrc, int lumFilterSize,
540 const int16_t *chrFilter, const int16_t **chrUSrc,
541 const int16_t **chrVSrc, int chrFilterSize,
542 const int16_t **alpSrc, uint8_t *dest, int dstW,
543 int y, enum PixelFormat target)
545 const uint8_t * const d128=dither_8x8_220[y&7];
546 uint8_t *g = c->table_gU[128] + c->table_gV[128];
550 for (i = 0; i < dstW - 1; i += 2) {
555 for (j = 0; j < lumFilterSize; j++) {
556 Y1 += lumSrc[j][i] * lumFilter[j];
557 Y2 += lumSrc[j][i+1] * lumFilter[j];
561 if ((Y1 | Y2) & 0x100) {
562 Y1 = av_clip_uint8(Y1);
563 Y2 = av_clip_uint8(Y2);
565 acc += acc + g[Y1 + d128[(i + 0) & 7]];
566 acc += acc + g[Y2 + d128[(i + 1) & 7]];
568 output_pixel(*dest++, acc);
573 static av_always_inline void
574 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
575 const int16_t *ubuf[2], const int16_t *vbuf[2],
576 const int16_t *abuf[2], uint8_t *dest, int dstW,
577 int yalpha, int uvalpha, int y,
578 enum PixelFormat target)
580 const int16_t *buf0 = buf[0], *buf1 = buf[1];
581 const uint8_t * const d128 = dither_8x8_220[y & 7];
582 uint8_t *g = c->table_gU[128] + c->table_gV[128];
583 int yalpha1 = 4095 - yalpha;
586 for (i = 0; i < dstW - 7; i += 8) {
587 int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
588 acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
589 acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
590 acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
591 acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
592 acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
593 acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
594 acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
595 output_pixel(*dest++, acc);
599 static av_always_inline void
600 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
601 const int16_t *ubuf[2], const int16_t *vbuf[2],
602 const int16_t *abuf0, uint8_t *dest, int dstW,
603 int uvalpha, int y, enum PixelFormat target)
605 const uint8_t * const d128 = dither_8x8_220[y & 7];
606 uint8_t *g = c->table_gU[128] + c->table_gV[128];
609 for (i = 0; i < dstW - 7; i += 8) {
610 int acc = g[(buf0[i ] >> 7) + d128[0]];
611 acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
612 acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
613 acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
614 acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
615 acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
616 acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
617 acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
618 output_pixel(*dest++, acc);
624 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
625 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
626 const int16_t **lumSrc, int lumFilterSize, \
627 const int16_t *chrFilter, const int16_t **chrUSrc, \
628 const int16_t **chrVSrc, int chrFilterSize, \
629 const int16_t **alpSrc, uint8_t *dest, int dstW, \
632 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
633 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
634 alpSrc, dest, dstW, y, fmt); \
637 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
638 const int16_t *ubuf[2], const int16_t *vbuf[2], \
639 const int16_t *abuf[2], uint8_t *dest, int dstW, \
640 int yalpha, int uvalpha, int y) \
642 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
643 dest, dstW, yalpha, uvalpha, y, fmt); \
646 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
647 const int16_t *ubuf[2], const int16_t *vbuf[2], \
648 const int16_t *abuf0, uint8_t *dest, int dstW, \
649 int uvalpha, int y) \
651 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
652 abuf0, dest, dstW, uvalpha, \
656 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
657 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
659 #define output_pixels(pos, Y1, U, Y2, V) \
660 if (target == PIX_FMT_YUYV422) { \
661 dest[pos + 0] = Y1; \
663 dest[pos + 2] = Y2; \
667 dest[pos + 1] = Y1; \
669 dest[pos + 3] = Y2; \
672 static av_always_inline void
673 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
674 const int16_t **lumSrc, int lumFilterSize,
675 const int16_t *chrFilter, const int16_t **chrUSrc,
676 const int16_t **chrVSrc, int chrFilterSize,
677 const int16_t **alpSrc, uint8_t *dest, int dstW,
678 int y, enum PixelFormat target)
682 for (i = 0; i < (dstW >> 1); i++) {
689 for (j = 0; j < lumFilterSize; j++) {
690 Y1 += lumSrc[j][i * 2] * lumFilter[j];
691 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
693 for (j = 0; j < chrFilterSize; j++) {
694 U += chrUSrc[j][i] * chrFilter[j];
695 V += chrVSrc[j][i] * chrFilter[j];
701 if ((Y1 | Y2 | U | V) & 0x100) {
702 Y1 = av_clip_uint8(Y1);
703 Y2 = av_clip_uint8(Y2);
704 U = av_clip_uint8(U);
705 V = av_clip_uint8(V);
707 output_pixels(4*i, Y1, U, Y2, V);
711 static av_always_inline void
712 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
713 const int16_t *ubuf[2], const int16_t *vbuf[2],
714 const int16_t *abuf[2], uint8_t *dest, int dstW,
715 int yalpha, int uvalpha, int y,
716 enum PixelFormat target)
718 const int16_t *buf0 = buf[0], *buf1 = buf[1],
719 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
720 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
721 int yalpha1 = 4095 - yalpha;
722 int uvalpha1 = 4095 - uvalpha;
725 for (i = 0; i < (dstW >> 1); i++) {
726 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
727 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
728 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
729 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
731 output_pixels(i * 4, Y1, U, Y2, V);
735 static av_always_inline void
736 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
737 const int16_t *ubuf[2], const int16_t *vbuf[2],
738 const int16_t *abuf0, uint8_t *dest, int dstW,
739 int uvalpha, int y, enum PixelFormat target)
741 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
742 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
745 if (uvalpha < 2048) {
746 for (i = 0; i < (dstW >> 1); i++) {
747 int Y1 = buf0[i * 2] >> 7;
748 int Y2 = buf0[i * 2 + 1] >> 7;
749 int U = ubuf1[i] >> 7;
750 int V = vbuf1[i] >> 7;
752 output_pixels(i * 4, Y1, U, Y2, V);
755 for (i = 0; i < (dstW >> 1); i++) {
756 int Y1 = buf0[i * 2] >> 7;
757 int Y2 = buf0[i * 2 + 1] >> 7;
758 int U = (ubuf0[i] + ubuf1[i]) >> 8;
759 int V = (vbuf0[i] + vbuf1[i]) >> 8;
761 output_pixels(i * 4, Y1, U, Y2, V);
768 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
769 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
771 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
772 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
773 #define output_pixel(pos, val) \
774 if (isBE(target)) { \
780 static av_always_inline void
781 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
782 const int32_t **lumSrc, int lumFilterSize,
783 const int16_t *chrFilter, const int32_t **chrUSrc,
784 const int32_t **chrVSrc, int chrFilterSize,
785 const int32_t **alpSrc, uint16_t *dest, int dstW,
786 int y, enum PixelFormat target)
790 for (i = 0; i < (dstW >> 1); i++) {
794 int U = -128 << 23; // 19
798 for (j = 0; j < lumFilterSize; j++) {
799 Y1 += lumSrc[j][i * 2] * lumFilter[j];
800 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
802 for (j = 0; j < chrFilterSize; j++) {
803 U += chrUSrc[j][i] * chrFilter[j];
804 V += chrVSrc[j][i] * chrFilter[j];
807 // 8bit: 12+15=27; 16-bit: 12+19=31
813 // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
814 Y1 -= c->yuv2rgb_y_offset;
815 Y2 -= c->yuv2rgb_y_offset;
816 Y1 *= c->yuv2rgb_y_coeff;
817 Y2 *= c->yuv2rgb_y_coeff;
820 // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
822 R = V * c->yuv2rgb_v2r_coeff;
823 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
824 B = U * c->yuv2rgb_u2b_coeff;
826 // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
827 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
828 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
829 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
830 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
831 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
832 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
837 static av_always_inline void
838 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
839 const int32_t *ubuf[2], const int32_t *vbuf[2],
840 const int32_t *abuf[2], uint16_t *dest, int dstW,
841 int yalpha, int uvalpha, int y,
842 enum PixelFormat target)
844 const int32_t *buf0 = buf[0], *buf1 = buf[1],
845 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
846 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
847 int yalpha1 = 4095 - yalpha;
848 int uvalpha1 = 4095 - uvalpha;
851 for (i = 0; i < (dstW >> 1); i++) {
852 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
853 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
854 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
855 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
858 Y1 -= c->yuv2rgb_y_offset;
859 Y2 -= c->yuv2rgb_y_offset;
860 Y1 *= c->yuv2rgb_y_coeff;
861 Y2 *= c->yuv2rgb_y_coeff;
865 R = V * c->yuv2rgb_v2r_coeff;
866 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
867 B = U * c->yuv2rgb_u2b_coeff;
869 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
870 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
871 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
872 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
873 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
874 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
879 static av_always_inline void
880 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
881 const int32_t *ubuf[2], const int32_t *vbuf[2],
882 const int32_t *abuf0, uint16_t *dest, int dstW,
883 int uvalpha, int y, enum PixelFormat target)
885 const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
886 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
889 if (uvalpha < 2048) {
890 for (i = 0; i < (dstW >> 1); i++) {
891 int Y1 = (buf0[i * 2] ) >> 2;
892 int Y2 = (buf0[i * 2 + 1]) >> 2;
893 int U = (ubuf0[i] + (-128 << 11)) >> 2;
894 int V = (vbuf0[i] + (-128 << 11)) >> 2;
897 Y1 -= c->yuv2rgb_y_offset;
898 Y2 -= c->yuv2rgb_y_offset;
899 Y1 *= c->yuv2rgb_y_coeff;
900 Y2 *= c->yuv2rgb_y_coeff;
904 R = V * c->yuv2rgb_v2r_coeff;
905 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
906 B = U * c->yuv2rgb_u2b_coeff;
908 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
909 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
910 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
911 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
912 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
913 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
917 for (i = 0; i < (dstW >> 1); i++) {
918 int Y1 = (buf0[i * 2] ) >> 2;
919 int Y2 = (buf0[i * 2 + 1]) >> 2;
920 int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
921 int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
924 Y1 -= c->yuv2rgb_y_offset;
925 Y2 -= c->yuv2rgb_y_offset;
926 Y1 *= c->yuv2rgb_y_coeff;
927 Y2 *= c->yuv2rgb_y_coeff;
931 R = V * c->yuv2rgb_v2r_coeff;
932 G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
933 B = U * c->yuv2rgb_u2b_coeff;
935 output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
936 output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
937 output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
938 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
939 output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
940 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
950 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
951 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
952 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
953 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
955 static av_always_inline void
956 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
957 int U, int V, int A1, int A2,
958 const void *_r, const void *_g, const void *_b, int y,
959 enum PixelFormat target, int hasAlpha)
961 if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
962 target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
963 uint32_t *dest = (uint32_t *) _dest;
964 const uint32_t *r = (const uint32_t *) _r;
965 const uint32_t *g = (const uint32_t *) _g;
966 const uint32_t *b = (const uint32_t *) _b;
969 int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
971 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
972 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
975 int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
977 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
978 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
980 dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
981 dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
984 } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
985 uint8_t *dest = (uint8_t *) _dest;
986 const uint8_t *r = (const uint8_t *) _r;
987 const uint8_t *g = (const uint8_t *) _g;
988 const uint8_t *b = (const uint8_t *) _b;
990 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
991 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
992 dest[i * 6 + 0] = r_b[Y1];
993 dest[i * 6 + 1] = g[Y1];
994 dest[i * 6 + 2] = b_r[Y1];
995 dest[i * 6 + 3] = r_b[Y2];
996 dest[i * 6 + 4] = g[Y2];
997 dest[i * 6 + 5] = b_r[Y2];
1000 } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
1001 target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
1002 target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
1003 uint16_t *dest = (uint16_t *) _dest;
1004 const uint16_t *r = (const uint16_t *) _r;
1005 const uint16_t *g = (const uint16_t *) _g;
1006 const uint16_t *b = (const uint16_t *) _b;
1007 int dr1, dg1, db1, dr2, dg2, db2;
1009 if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
1010 dr1 = dither_2x2_8[ y & 1 ][0];
1011 dg1 = dither_2x2_4[ y & 1 ][0];
1012 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1013 dr2 = dither_2x2_8[ y & 1 ][1];
1014 dg2 = dither_2x2_4[ y & 1 ][1];
1015 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1016 } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
1017 dr1 = dither_2x2_8[ y & 1 ][0];
1018 dg1 = dither_2x2_8[ y & 1 ][1];
1019 db1 = dither_2x2_8[(y & 1) ^ 1][0];
1020 dr2 = dither_2x2_8[ y & 1 ][1];
1021 dg2 = dither_2x2_8[ y & 1 ][0];
1022 db2 = dither_2x2_8[(y & 1) ^ 1][1];
1024 dr1 = dither_4x4_16[ y & 3 ][0];
1025 dg1 = dither_4x4_16[ y & 3 ][1];
1026 db1 = dither_4x4_16[(y & 3) ^ 3][0];
1027 dr2 = dither_4x4_16[ y & 3 ][1];
1028 dg2 = dither_4x4_16[ y & 3 ][0];
1029 db2 = dither_4x4_16[(y & 3) ^ 3][1];
1032 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1033 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1034 } else /* 8/4-bit */ {
1035 uint8_t *dest = (uint8_t *) _dest;
1036 const uint8_t *r = (const uint8_t *) _r;
1037 const uint8_t *g = (const uint8_t *) _g;
1038 const uint8_t *b = (const uint8_t *) _b;
1039 int dr1, dg1, db1, dr2, dg2, db2;
1041 if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
1042 const uint8_t * const d64 = dither_8x8_73[y & 7];
1043 const uint8_t * const d32 = dither_8x8_32[y & 7];
1044 dr1 = dg1 = d32[(i * 2 + 0) & 7];
1045 db1 = d64[(i * 2 + 0) & 7];
1046 dr2 = dg2 = d32[(i * 2 + 1) & 7];
1047 db2 = d64[(i * 2 + 1) & 7];
1049 const uint8_t * const d64 = dither_8x8_73 [y & 7];
1050 const uint8_t * const d128 = dither_8x8_220[y & 7];
1051 dr1 = db1 = d128[(i * 2 + 0) & 7];
1052 dg1 = d64[(i * 2 + 0) & 7];
1053 dr2 = db2 = d128[(i * 2 + 1) & 7];
1054 dg2 = d64[(i * 2 + 1) & 7];
1057 if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
1058 dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1059 ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1061 dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1062 dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1067 static av_always_inline void
1068 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1069 const int16_t **lumSrc, int lumFilterSize,
1070 const int16_t *chrFilter, const int16_t **chrUSrc,
1071 const int16_t **chrVSrc, int chrFilterSize,
1072 const int16_t **alpSrc, uint8_t *dest, int dstW,
1073 int y, enum PixelFormat target, int hasAlpha)
1077 for (i = 0; i < (dstW >> 1); i++) {
1083 int av_unused A1, A2;
1084 const void *r, *g, *b;
1086 for (j = 0; j < lumFilterSize; j++) {
1087 Y1 += lumSrc[j][i * 2] * lumFilter[j];
1088 Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1090 for (j = 0; j < chrFilterSize; j++) {
1091 U += chrUSrc[j][i] * chrFilter[j];
1092 V += chrVSrc[j][i] * chrFilter[j];
1098 if ((Y1 | Y2 | U | V) & 0x100) {
1099 Y1 = av_clip_uint8(Y1);
1100 Y2 = av_clip_uint8(Y2);
1101 U = av_clip_uint8(U);
1102 V = av_clip_uint8(V);
1107 for (j = 0; j < lumFilterSize; j++) {
1108 A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1109 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1113 if ((A1 | A2) & 0x100) {
1114 A1 = av_clip_uint8(A1);
1115 A2 = av_clip_uint8(A2);
1119 /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1121 g = (c->table_gU[U] + c->table_gV[V]);
1124 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1125 r, g, b, y, target, hasAlpha);
1129 static av_always_inline void
1130 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1131 const int16_t *ubuf[2], const int16_t *vbuf[2],
1132 const int16_t *abuf[2], uint8_t *dest, int dstW,
1133 int yalpha, int uvalpha, int y,
1134 enum PixelFormat target, int hasAlpha)
1136 const int16_t *buf0 = buf[0], *buf1 = buf[1],
1137 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1138 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1139 *abuf0 = abuf[0], *abuf1 = abuf[1];
1140 int yalpha1 = 4095 - yalpha;
1141 int uvalpha1 = 4095 - uvalpha;
1144 for (i = 0; i < (dstW >> 1); i++) {
1145 int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1146 int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1147 int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1148 int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1150 const void *r = c->table_rV[V],
1151 *g = (c->table_gU[U] + c->table_gV[V]),
1152 *b = c->table_bU[U];
1155 A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1156 A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1159 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1160 r, g, b, y, target, hasAlpha);
1164 static av_always_inline void
1165 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1166 const int16_t *ubuf[2], const int16_t *vbuf[2],
1167 const int16_t *abuf0, uint8_t *dest, int dstW,
1168 int uvalpha, int y, enum PixelFormat target,
1171 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1172 *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1175 if (uvalpha < 2048) {
1176 for (i = 0; i < (dstW >> 1); i++) {
1177 int Y1 = buf0[i * 2] >> 7;
1178 int Y2 = buf0[i * 2 + 1] >> 7;
1179 int U = ubuf1[i] >> 7;
1180 int V = vbuf1[i] >> 7;
1182 const void *r = c->table_rV[V],
1183 *g = (c->table_gU[U] + c->table_gV[V]),
1184 *b = c->table_bU[U];
1187 A1 = abuf0[i * 2 ] >> 7;
1188 A2 = abuf0[i * 2 + 1] >> 7;
1191 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1192 r, g, b, y, target, hasAlpha);
1195 for (i = 0; i < (dstW >> 1); i++) {
1196 int Y1 = buf0[i * 2] >> 7;
1197 int Y2 = buf0[i * 2 + 1] >> 7;
1198 int U = (ubuf0[i] + ubuf1[i]) >> 8;
1199 int V = (vbuf0[i] + vbuf1[i]) >> 8;
1201 const void *r = c->table_rV[V],
1202 *g = (c->table_gU[U] + c->table_gV[V]),
1203 *b = c->table_bU[U];
1206 A1 = abuf0[i * 2 ] >> 7;
1207 A2 = abuf0[i * 2 + 1] >> 7;
1210 yuv2rgb_write(dest, i, Y1, Y2, U, V, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1211 r, g, b, y, target, hasAlpha);
1216 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1217 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1218 const int16_t **lumSrc, int lumFilterSize, \
1219 const int16_t *chrFilter, const int16_t **chrUSrc, \
1220 const int16_t **chrVSrc, int chrFilterSize, \
1221 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1224 name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1225 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1226 alpSrc, dest, dstW, y, fmt, hasAlpha); \
1228 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1229 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1230 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1231 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1232 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1233 int yalpha, int uvalpha, int y) \
1235 name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1236 dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1239 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1240 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1241 const int16_t *abuf0, uint8_t *dest, int dstW, \
1242 int uvalpha, int y) \
1244 name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1245 dstW, uvalpha, y, fmt, hasAlpha); \
1249 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1250 YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1252 #if CONFIG_SWSCALE_ALPHA
1253 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1);
1254 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1);
1256 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0);
1257 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0);
1259 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0);
1260 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0);
1261 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0);
1262 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0);
1263 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0);
1264 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0);
1265 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0);
1266 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0);
1268 static av_always_inline void
1269 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1270 const int16_t **lumSrc, int lumFilterSize,
1271 const int16_t *chrFilter, const int16_t **chrUSrc,
1272 const int16_t **chrVSrc, int chrFilterSize,
1273 const int16_t **alpSrc, uint8_t *dest,
1274 int dstW, int y, enum PixelFormat target, int hasAlpha)
1277 int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1279 for (i = 0; i < dstW; i++) {
1287 for (j = 0; j < lumFilterSize; j++) {
1288 Y += lumSrc[j][i] * lumFilter[j];
1290 for (j = 0; j < chrFilterSize; j++) {
1291 U += chrUSrc[j][i] * chrFilter[j];
1292 V += chrVSrc[j][i] * chrFilter[j];
1299 for (j = 0; j < lumFilterSize; j++) {
1300 A += alpSrc[j][i] * lumFilter[j];
1304 A = av_clip_uint8(A);
1306 Y -= c->yuv2rgb_y_offset;
1307 Y *= c->yuv2rgb_y_coeff;
1309 R = Y + V*c->yuv2rgb_v2r_coeff;
1310 G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1311 B = Y + U*c->yuv2rgb_u2b_coeff;
1312 if ((R | G | B) & 0xC0000000) {
1313 R = av_clip_uintp2(R, 30);
1314 G = av_clip_uintp2(G, 30);
1315 B = av_clip_uintp2(B, 30);
1320 dest[0] = hasAlpha ? A : 255;
1334 dest[3] = hasAlpha ? A : 255;
1337 dest[0] = hasAlpha ? A : 255;
1352 dest[3] = hasAlpha ? A : 255;
1360 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1361 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1362 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1363 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf);
1365 #if CONFIG_SWSCALE_ALPHA
1366 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1);
1367 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1);
1368 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1);
1369 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1);
1371 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0);
1372 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0);
1373 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0);
1374 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0);
1376 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0);
1377 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0);
1379 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1380 int width, int height,
1384 uint8_t *ptr = plane + stride*y;
1385 for (i=0; i<height; i++) {
1386 memset(ptr, val, width);
1391 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1393 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1394 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1396 static av_always_inline void
1397 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1398 enum PixelFormat origin)
1401 for (i = 0; i < width; i++) {
1402 unsigned int r_b = input_pixel(&src[i*3+0]);
1403 unsigned int g = input_pixel(&src[i*3+1]);
1404 unsigned int b_r = input_pixel(&src[i*3+2]);
1406 dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1410 static av_always_inline void
1411 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1412 const uint16_t *src1, const uint16_t *src2,
1413 int width, enum PixelFormat origin)
1417 for (i = 0; i < width; i++) {
1418 int r_b = input_pixel(&src1[i*3+0]);
1419 int g = input_pixel(&src1[i*3+1]);
1420 int b_r = input_pixel(&src1[i*3+2]);
1422 dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1423 dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1427 static av_always_inline void
1428 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1429 const uint16_t *src1, const uint16_t *src2,
1430 int width, enum PixelFormat origin)
1434 for (i = 0; i < width; i++) {
1435 int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1436 int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1437 int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1439 dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1440 dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1448 #define rgb48funcs(pattern, BE_LE, origin) \
1449 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1450 int width, uint32_t *unused) \
1452 const uint16_t *src = (const uint16_t *) _src; \
1453 uint16_t *dst = (uint16_t *) _dst; \
1454 rgb48ToY_c_template(dst, src, width, origin); \
1457 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1458 const uint8_t *_src1, const uint8_t *_src2, \
1459 int width, uint32_t *unused) \
1461 const uint16_t *src1 = (const uint16_t *) _src1, \
1462 *src2 = (const uint16_t *) _src2; \
1463 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1464 rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1467 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1468 const uint8_t *_src1, const uint8_t *_src2, \
1469 int width, uint32_t *unused) \
1471 const uint16_t *src1 = (const uint16_t *) _src1, \
1472 *src2 = (const uint16_t *) _src2; \
1473 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1474 rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1477 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
1478 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
1479 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
1480 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
1482 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1483 origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1484 (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1486 static av_always_inline void
1487 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1488 int width, enum PixelFormat origin,
1489 int shr, int shg, int shb, int shp,
1490 int maskr, int maskg, int maskb,
1491 int rsh, int gsh, int bsh, int S)
1493 const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
1494 rnd = 33 << (S - 1);
1497 for (i = 0; i < width; i++) {
1498 int px = input_pixel(i) >> shp;
1499 int b = (px & maskb) >> shb;
1500 int g = (px & maskg) >> shg;
1501 int r = (px & maskr) >> shr;
1503 dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1507 static av_always_inline void
1508 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1509 const uint8_t *src, int width,
1510 enum PixelFormat origin,
1511 int shr, int shg, int shb, int shp,
1512 int maskr, int maskg, int maskb,
1513 int rsh, int gsh, int bsh, int S)
1515 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1516 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1517 rnd = 257 << (S - 1);
1520 for (i = 0; i < width; i++) {
1521 int px = input_pixel(i) >> shp;
1522 int b = (px & maskb) >> shb;
1523 int g = (px & maskg) >> shg;
1524 int r = (px & maskr) >> shr;
1526 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1527 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1531 static av_always_inline void
1532 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1533 const uint8_t *src, int width,
1534 enum PixelFormat origin,
1535 int shr, int shg, int shb, int shp,
1536 int maskr, int maskg, int maskb,
1537 int rsh, int gsh, int bsh, int S)
1539 const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1540 rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1541 rnd = 257 << S, maskgx = ~(maskr | maskb);
1544 maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1545 for (i = 0; i < width; i++) {
1546 int px0 = input_pixel(2 * i + 0) >> shp;
1547 int px1 = input_pixel(2 * i + 1) >> shp;
1548 int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1549 int rb = px0 + px1 - g;
1551 b = (rb & maskb) >> shb;
1552 if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1553 origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1556 g = (g & maskg) >> shg;
1558 r = (rb & maskr) >> shr;
1560 dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1561 dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1567 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1568 maskg, maskb, rsh, gsh, bsh, S) \
1569 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1570 int width, uint32_t *unused) \
1572 rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1573 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1576 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1577 const uint8_t *src, const uint8_t *dummy, \
1578 int width, uint32_t *unused) \
1580 rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1581 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1584 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1585 const uint8_t *src, const uint8_t *dummy, \
1586 int width, uint32_t *unused) \
1588 rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1589 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1592 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1593 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
1594 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1595 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
1596 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1597 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1598 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1599 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1600 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
1601 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
1602 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
1603 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
1605 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1608 for (i=0; i<width; i++) {
1613 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1616 for (i=0; i<width; i++) {
1621 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1624 for (i=0; i<width; i++) {
1627 dst[i]= pal[d] & 0xFF;
1631 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1632 const uint8_t *src1, const uint8_t *src2,
1633 int width, uint32_t *pal)
1636 assert(src1 == src2);
1637 for (i=0; i<width; i++) {
1638 int p= pal[src1[i]];
1645 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1646 int width, uint32_t *unused)
1649 for (i=0; i<width/8; i++) {
1652 dst[8*i+j]= ((d>>(7-j))&1)*255;
1656 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1657 int width, uint32_t *unused)
1660 for (i=0; i<width/8; i++) {
1663 dst[8*i+j]= ((d>>(7-j))&1)*255;
1667 //FIXME yuy2* can read up to 7 samples too much
1669 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1673 for (i=0; i<width; i++)
1677 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1678 const uint8_t *src2, int width, uint32_t *unused)
1681 for (i=0; i<width; i++) {
1682 dstU[i]= src1[4*i + 1];
1683 dstV[i]= src1[4*i + 3];
1685 assert(src1 == src2);
1688 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1691 const uint16_t *src = (const uint16_t *) _src;
1692 uint16_t *dst = (uint16_t *) _dst;
1693 for (i=0; i<width; i++) {
1694 dst[i] = av_bswap16(src[i]);
1698 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1699 const uint8_t *_src2, int width, uint32_t *unused)
1702 const uint16_t *src1 = (const uint16_t *) _src1,
1703 *src2 = (const uint16_t *) _src2;
1704 uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1705 for (i=0; i<width; i++) {
1706 dstU[i] = av_bswap16(src1[i]);
1707 dstV[i] = av_bswap16(src2[i]);
1711 /* This is almost identical to the previous, end exists only because
1712 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1713 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1717 for (i=0; i<width; i++)
1721 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1722 const uint8_t *src2, int width, uint32_t *unused)
1725 for (i=0; i<width; i++) {
1726 dstU[i]= src1[4*i + 0];
1727 dstV[i]= src1[4*i + 2];
1729 assert(src1 == src2);
1732 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1733 const uint8_t *src, int width)
1736 for (i = 0; i < width; i++) {
1737 dst1[i] = src[2*i+0];
1738 dst2[i] = src[2*i+1];
1742 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1743 const uint8_t *src1, const uint8_t *src2,
1744 int width, uint32_t *unused)
1746 nvXXtoUV_c(dstU, dstV, src1, width);
1749 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1750 const uint8_t *src1, const uint8_t *src2,
1751 int width, uint32_t *unused)
1753 nvXXtoUV_c(dstV, dstU, src1, width);
1756 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1758 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1759 int width, uint32_t *unused)
1762 for (i=0; i<width; i++) {
1767 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1771 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1772 const uint8_t *src2, int width, uint32_t *unused)
1775 for (i=0; i<width; i++) {
1776 int b= src1[3*i + 0];
1777 int g= src1[3*i + 1];
1778 int r= src1[3*i + 2];
1780 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1781 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1783 assert(src1 == src2);
1786 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1787 const uint8_t *src2, int width, uint32_t *unused)
1790 for (i=0; i<width; i++) {
1791 int b= src1[6*i + 0] + src1[6*i + 3];
1792 int g= src1[6*i + 1] + src1[6*i + 4];
1793 int r= src1[6*i + 2] + src1[6*i + 5];
1795 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1796 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1798 assert(src1 == src2);
1801 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1805 for (i=0; i<width; i++) {
1810 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1814 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1815 const uint8_t *src2, int width, uint32_t *unused)
1819 for (i=0; i<width; i++) {
1820 int r= src1[3*i + 0];
1821 int g= src1[3*i + 1];
1822 int b= src1[3*i + 2];
1824 dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1825 dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1829 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1830 const uint8_t *src2, int width, uint32_t *unused)
1834 for (i=0; i<width; i++) {
1835 int r= src1[6*i + 0] + src1[6*i + 3];
1836 int g= src1[6*i + 1] + src1[6*i + 4];
1837 int b= src1[6*i + 2] + src1[6*i + 5];
1839 dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1840 dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1844 static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1845 const int16_t *filter,
1846 const int16_t *filterPos, int filterSize)
1849 int32_t *dst = (int32_t *) _dst;
1850 const uint16_t *src = (const uint16_t *) _src;
1851 int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1852 int sh = (bits <= 7) ? 11 : (bits - 4);
1854 for (i = 0; i < dstW; i++) {
1856 int srcPos = filterPos[i];
1857 unsigned int val = 0;
1859 for (j = 0; j < filterSize; j++) {
1860 val += src[srcPos + j] * filter[filterSize * i + j];
1862 // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1863 dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1867 // bilinear / bicubic scaling
1868 static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1869 const int16_t *filter, const int16_t *filterPos,
1873 for (i=0; i<dstW; i++) {
1875 int srcPos= filterPos[i];
1877 for (j=0; j<filterSize; j++) {
1878 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1880 //filter += hFilterSize;
1881 dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1886 //FIXME all pal and rgb srcFormats could do this convertion as well
1887 //FIXME all scalers more complex than bilinear could do half of this transform
1888 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1891 for (i = 0; i < width; i++) {
1892 dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1893 dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1896 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1899 for (i = 0; i < width; i++) {
1900 dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1901 dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1904 static void lumRangeToJpeg_c(int16_t *dst, int width)
1907 for (i = 0; i < width; i++)
1908 dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1910 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1913 for (i = 0; i < width; i++)
1914 dst[i] = (dst[i]*14071 + 33561947)>>14;
1917 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1920 int32_t *dstU = (int32_t *) _dstU;
1921 int32_t *dstV = (int32_t *) _dstV;
1922 for (i = 0; i < width; i++) {
1923 dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1924 dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1927 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1930 int32_t *dstU = (int32_t *) _dstU;
1931 int32_t *dstV = (int32_t *) _dstV;
1932 for (i = 0; i < width; i++) {
1933 dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
1934 dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
1937 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
1940 int32_t *dst = (int32_t *) _dst;
1941 for (i = 0; i < width; i++)
1942 dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
1944 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
1947 int32_t *dst = (int32_t *) _dst;
1948 for (i = 0; i < width; i++)
1949 dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
1952 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
1953 const uint8_t *src, int srcW, int xInc)
1956 unsigned int xpos=0;
1957 for (i=0;i<dstWidth;i++) {
1958 register unsigned int xx=xpos>>16;
1959 register unsigned int xalpha=(xpos&0xFFFF)>>9;
1960 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
1965 static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
1968 uint8_t *dst = (uint8_t *) _dst;
1969 for (i = len - 1; i >= 0; i--) {
1970 dst[i * 2] = dst[i * 2 + 1] = src[i];
1974 static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
1977 for (i = 0; i < len; i++) {
1978 dst[i] = src[i] >> 4;
1982 // *** horizontal scale Y line to temp buffer
1983 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
1984 const uint8_t *src, int srcW, int xInc,
1985 const int16_t *hLumFilter,
1986 const int16_t *hLumFilterPos, int hLumFilterSize,
1987 uint8_t *formatConvBuffer,
1988 uint32_t *pal, int isAlpha)
1990 void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
1991 void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
1994 toYV12(formatConvBuffer, src, srcW, pal);
1995 src= formatConvBuffer;
1998 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
1999 c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
2000 src = formatConvBuffer;
2003 if (!c->hyscale_fast) {
2004 c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2005 } else { // fast bilinear upscale / crap downscale
2006 c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2010 convertRange(dst, dstWidth);
2012 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2013 c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
2017 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2018 int dstWidth, const uint8_t *src1,
2019 const uint8_t *src2, int srcW, int xInc)
2022 unsigned int xpos=0;
2023 for (i=0;i<dstWidth;i++) {
2024 register unsigned int xx=xpos>>16;
2025 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2026 dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2027 dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2032 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2033 const uint8_t *src1, const uint8_t *src2,
2034 int srcW, int xInc, const int16_t *hChrFilter,
2035 const int16_t *hChrFilterPos, int hChrFilterSize,
2036 uint8_t *formatConvBuffer, uint32_t *pal)
2039 uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16);
2040 c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2041 src1= formatConvBuffer;
2045 if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2046 uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16));
2047 c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
2048 c->scale8To16Rv((uint16_t *) buf2, src2, srcW);
2049 src1 = formatConvBuffer;
2053 if (!c->hcscale_fast) {
2054 c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2055 c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2056 } else { // fast bilinear upscale / crap downscale
2057 c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2060 if (c->chrConvertRange)
2061 c->chrConvertRange(dst1, dst2, dstWidth);
2063 if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
2064 c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
2065 c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
2069 static av_always_inline void
2070 find_c_packed_planar_out_funcs(SwsContext *c,
2071 yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
2072 yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2073 yuv2packedX_fn *yuv2packedX)
2075 enum PixelFormat dstFormat = c->dstFormat;
2077 if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
2078 *yuv2yuvX = yuv2nv12X_c;
2079 } else if (is16BPS(dstFormat)) {
2080 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
2081 } else if (is9_OR_10BPS(dstFormat)) {
2082 if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2083 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
2085 *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
2088 *yuv2yuv1 = yuv2yuv1_c;
2089 *yuv2yuvX = yuv2yuvX_c;
2091 if(c->flags & SWS_FULL_CHR_H_INT) {
2092 switch (dstFormat) {
2095 *yuv2packedX = yuv2rgba32_full_X_c;
2097 #if CONFIG_SWSCALE_ALPHA
2099 *yuv2packedX = yuv2rgba32_full_X_c;
2101 #endif /* CONFIG_SWSCALE_ALPHA */
2103 *yuv2packedX = yuv2rgbx32_full_X_c;
2105 #endif /* !CONFIG_SMALL */
2109 *yuv2packedX = yuv2argb32_full_X_c;
2111 #if CONFIG_SWSCALE_ALPHA
2113 *yuv2packedX = yuv2argb32_full_X_c;
2115 #endif /* CONFIG_SWSCALE_ALPHA */
2117 *yuv2packedX = yuv2xrgb32_full_X_c;
2119 #endif /* !CONFIG_SMALL */
2123 *yuv2packedX = yuv2bgra32_full_X_c;
2125 #if CONFIG_SWSCALE_ALPHA
2127 *yuv2packedX = yuv2bgra32_full_X_c;
2129 #endif /* CONFIG_SWSCALE_ALPHA */
2131 *yuv2packedX = yuv2bgrx32_full_X_c;
2133 #endif /* !CONFIG_SMALL */
2137 *yuv2packedX = yuv2abgr32_full_X_c;
2139 #if CONFIG_SWSCALE_ALPHA
2141 *yuv2packedX = yuv2abgr32_full_X_c;
2143 #endif /* CONFIG_SWSCALE_ALPHA */
2145 *yuv2packedX = yuv2xbgr32_full_X_c;
2147 #endif /* !CONFIG_SMALL */
2150 *yuv2packedX = yuv2rgb24_full_X_c;
2153 *yuv2packedX = yuv2bgr24_full_X_c;
2157 switch (dstFormat) {
2158 case PIX_FMT_GRAY16BE:
2159 *yuv2packed1 = yuv2gray16BE_1_c;
2160 *yuv2packed2 = yuv2gray16BE_2_c;
2161 *yuv2packedX = yuv2gray16BE_X_c;
2163 case PIX_FMT_GRAY16LE:
2164 *yuv2packed1 = yuv2gray16LE_1_c;
2165 *yuv2packed2 = yuv2gray16LE_2_c;
2166 *yuv2packedX = yuv2gray16LE_X_c;
2168 case PIX_FMT_MONOWHITE:
2169 *yuv2packed1 = yuv2monowhite_1_c;
2170 *yuv2packed2 = yuv2monowhite_2_c;
2171 *yuv2packedX = yuv2monowhite_X_c;
2173 case PIX_FMT_MONOBLACK:
2174 *yuv2packed1 = yuv2monoblack_1_c;
2175 *yuv2packed2 = yuv2monoblack_2_c;
2176 *yuv2packedX = yuv2monoblack_X_c;
2178 case PIX_FMT_YUYV422:
2179 *yuv2packed1 = yuv2yuyv422_1_c;
2180 *yuv2packed2 = yuv2yuyv422_2_c;
2181 *yuv2packedX = yuv2yuyv422_X_c;
2183 case PIX_FMT_UYVY422:
2184 *yuv2packed1 = yuv2uyvy422_1_c;
2185 *yuv2packed2 = yuv2uyvy422_2_c;
2186 *yuv2packedX = yuv2uyvy422_X_c;
2188 case PIX_FMT_RGB48LE:
2189 *yuv2packed1 = yuv2rgb48le_1_c;
2190 *yuv2packed2 = yuv2rgb48le_2_c;
2191 *yuv2packedX = yuv2rgb48le_X_c;
2193 case PIX_FMT_RGB48BE:
2194 *yuv2packed1 = yuv2rgb48be_1_c;
2195 *yuv2packed2 = yuv2rgb48be_2_c;
2196 *yuv2packedX = yuv2rgb48be_X_c;
2198 case PIX_FMT_BGR48LE:
2199 *yuv2packed1 = yuv2bgr48le_1_c;
2200 *yuv2packed2 = yuv2bgr48le_2_c;
2201 *yuv2packedX = yuv2bgr48le_X_c;
2203 case PIX_FMT_BGR48BE:
2204 *yuv2packed1 = yuv2bgr48be_1_c;
2205 *yuv2packed2 = yuv2bgr48be_2_c;
2206 *yuv2packedX = yuv2bgr48be_X_c;
2211 *yuv2packed1 = yuv2rgb32_1_c;
2212 *yuv2packed2 = yuv2rgb32_2_c;
2213 *yuv2packedX = yuv2rgb32_X_c;
2215 #if CONFIG_SWSCALE_ALPHA
2217 *yuv2packed1 = yuv2rgba32_1_c;
2218 *yuv2packed2 = yuv2rgba32_2_c;
2219 *yuv2packedX = yuv2rgba32_X_c;
2221 #endif /* CONFIG_SWSCALE_ALPHA */
2223 *yuv2packed1 = yuv2rgbx32_1_c;
2224 *yuv2packed2 = yuv2rgbx32_2_c;
2225 *yuv2packedX = yuv2rgbx32_X_c;
2227 #endif /* !CONFIG_SMALL */
2229 case PIX_FMT_RGB32_1:
2230 case PIX_FMT_BGR32_1:
2232 *yuv2packed1 = yuv2rgb32_1_1_c;
2233 *yuv2packed2 = yuv2rgb32_1_2_c;
2234 *yuv2packedX = yuv2rgb32_1_X_c;
2236 #if CONFIG_SWSCALE_ALPHA
2238 *yuv2packed1 = yuv2rgba32_1_1_c;
2239 *yuv2packed2 = yuv2rgba32_1_2_c;
2240 *yuv2packedX = yuv2rgba32_1_X_c;
2242 #endif /* CONFIG_SWSCALE_ALPHA */
2244 *yuv2packed1 = yuv2rgbx32_1_1_c;
2245 *yuv2packed2 = yuv2rgbx32_1_2_c;
2246 *yuv2packedX = yuv2rgbx32_1_X_c;
2248 #endif /* !CONFIG_SMALL */
2251 *yuv2packed1 = yuv2rgb24_1_c;
2252 *yuv2packed2 = yuv2rgb24_2_c;
2253 *yuv2packedX = yuv2rgb24_X_c;
2256 *yuv2packed1 = yuv2bgr24_1_c;
2257 *yuv2packed2 = yuv2bgr24_2_c;
2258 *yuv2packedX = yuv2bgr24_X_c;
2260 case PIX_FMT_RGB565LE:
2261 case PIX_FMT_RGB565BE:
2262 case PIX_FMT_BGR565LE:
2263 case PIX_FMT_BGR565BE:
2264 *yuv2packed1 = yuv2rgb16_1_c;
2265 *yuv2packed2 = yuv2rgb16_2_c;
2266 *yuv2packedX = yuv2rgb16_X_c;
2268 case PIX_FMT_RGB555LE:
2269 case PIX_FMT_RGB555BE:
2270 case PIX_FMT_BGR555LE:
2271 case PIX_FMT_BGR555BE:
2272 *yuv2packed1 = yuv2rgb15_1_c;
2273 *yuv2packed2 = yuv2rgb15_2_c;
2274 *yuv2packedX = yuv2rgb15_X_c;
2276 case PIX_FMT_RGB444LE:
2277 case PIX_FMT_RGB444BE:
2278 case PIX_FMT_BGR444LE:
2279 case PIX_FMT_BGR444BE:
2280 *yuv2packed1 = yuv2rgb12_1_c;
2281 *yuv2packed2 = yuv2rgb12_2_c;
2282 *yuv2packedX = yuv2rgb12_X_c;
2286 *yuv2packed1 = yuv2rgb8_1_c;
2287 *yuv2packed2 = yuv2rgb8_2_c;
2288 *yuv2packedX = yuv2rgb8_X_c;
2292 *yuv2packed1 = yuv2rgb4_1_c;
2293 *yuv2packed2 = yuv2rgb4_2_c;
2294 *yuv2packedX = yuv2rgb4_X_c;
2296 case PIX_FMT_RGB4_BYTE:
2297 case PIX_FMT_BGR4_BYTE:
2298 *yuv2packed1 = yuv2rgb4b_1_c;
2299 *yuv2packed2 = yuv2rgb4b_2_c;
2300 *yuv2packedX = yuv2rgb4b_X_c;
2306 #define DEBUG_SWSCALE_BUFFERS 0
2307 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2309 static int swScale(SwsContext *c, const uint8_t* src[],
2310 int srcStride[], int srcSliceY,
2311 int srcSliceH, uint8_t* dst[], int dstStride[])
2313 /* load a few things into local vars to make the code more readable? and faster */
2314 const int srcW= c->srcW;
2315 const int dstW= c->dstW;
2316 const int dstH= c->dstH;
2317 const int chrDstW= c->chrDstW;
2318 const int chrSrcW= c->chrSrcW;
2319 const int lumXInc= c->lumXInc;
2320 const int chrXInc= c->chrXInc;
2321 const enum PixelFormat dstFormat= c->dstFormat;
2322 const int flags= c->flags;
2323 int16_t *vLumFilterPos= c->vLumFilterPos;
2324 int16_t *vChrFilterPos= c->vChrFilterPos;
2325 int16_t *hLumFilterPos= c->hLumFilterPos;
2326 int16_t *hChrFilterPos= c->hChrFilterPos;
2327 int16_t *vLumFilter= c->vLumFilter;
2328 int16_t *vChrFilter= c->vChrFilter;
2329 int16_t *hLumFilter= c->hLumFilter;
2330 int16_t *hChrFilter= c->hChrFilter;
2331 int32_t *lumMmxFilter= c->lumMmxFilter;
2332 int32_t *chrMmxFilter= c->chrMmxFilter;
2333 int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2334 const int vLumFilterSize= c->vLumFilterSize;
2335 const int vChrFilterSize= c->vChrFilterSize;
2336 const int hLumFilterSize= c->hLumFilterSize;
2337 const int hChrFilterSize= c->hChrFilterSize;
2338 int16_t **lumPixBuf= c->lumPixBuf;
2339 int16_t **chrUPixBuf= c->chrUPixBuf;
2340 int16_t **chrVPixBuf= c->chrVPixBuf;
2341 int16_t **alpPixBuf= c->alpPixBuf;
2342 const int vLumBufSize= c->vLumBufSize;
2343 const int vChrBufSize= c->vChrBufSize;
2344 uint8_t *formatConvBuffer= c->formatConvBuffer;
2345 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2346 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2348 uint32_t *pal=c->pal_yuv;
2349 yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
2350 yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
2351 yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2352 yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2353 yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2355 /* vars which will change and which we need to store back in the context */
2357 int lumBufIndex= c->lumBufIndex;
2358 int chrBufIndex= c->chrBufIndex;
2359 int lastInLumBuf= c->lastInLumBuf;
2360 int lastInChrBuf= c->lastInChrBuf;
2362 if (isPacked(c->srcFormat)) {
2370 srcStride[3]= srcStride[0];
2372 srcStride[1]<<= c->vChrDrop;
2373 srcStride[2]<<= c->vChrDrop;
2375 DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2376 src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2377 dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2378 DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2379 srcSliceY, srcSliceH, dstY, dstH);
2380 DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2381 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2383 if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2384 static int warnedAlready=0; //FIXME move this into the context perhaps
2385 if (flags & SWS_PRINT_INFO && !warnedAlready) {
2386 av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2387 " ->cannot do aligned memory accesses anymore\n");
2392 /* Note the user might start scaling the picture in the middle so this
2393 will not get executed. This is not really intended but works
2394 currently, so people might do it. */
2395 if (srcSliceY ==0) {
2405 for (;dstY < dstH; dstY++) {
2406 const int chrDstY= dstY>>c->chrDstVSubSample;
2407 uint8_t *dest[4] = {
2408 dst[0] + dstStride[0] * dstY,
2409 dst[1] + dstStride[1] * chrDstY,
2410 dst[2] + dstStride[2] * chrDstY,
2411 (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2414 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2415 const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2416 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2417 int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2418 int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
2419 int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2422 //handle holes (FAST_BILINEAR & weird filters)
2423 if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2424 if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2425 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2426 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2428 DEBUG_BUFFERS("dstY: %d\n", dstY);
2429 DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2430 firstLumSrcY, lastLumSrcY, lastInLumBuf);
2431 DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2432 firstChrSrcY, lastChrSrcY, lastInChrBuf);
2434 // Do we have enough lines in this slice to output the dstY line
2435 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2437 if (!enough_lines) {
2438 lastLumSrcY = srcSliceY + srcSliceH - 1;
2439 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2440 DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2441 lastLumSrcY, lastChrSrcY);
2444 //Do horizontal scaling
2445 while(lastInLumBuf < lastLumSrcY) {
2446 const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2447 const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2449 assert(lumBufIndex < 2*vLumBufSize);
2450 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2451 assert(lastInLumBuf + 1 - srcSliceY >= 0);
2452 hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2453 hLumFilter, hLumFilterPos, hLumFilterSize,
2456 if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2457 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
2458 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2462 DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2463 lumBufIndex, lastInLumBuf);
2465 while(lastInChrBuf < lastChrSrcY) {
2466 const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
2467 const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
2469 assert(chrBufIndex < 2*vChrBufSize);
2470 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2471 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2472 //FIXME replace parameters through context struct (some at least)
2474 if (c->needs_hcscale)
2475 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2476 chrDstW, src1, src2, chrSrcW, chrXInc,
2477 hChrFilter, hChrFilterPos, hChrFilterSize,
2478 formatConvBuffer, pal);
2480 DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2481 chrBufIndex, lastInChrBuf);
2483 //wrap buf index around to stay inside the ring buffer
2484 if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2485 if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2487 break; //we can't output a dstY line so let's try with the next slice
2490 updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2492 if (dstY >= dstH-2) {
2493 // hmm looks like we can't use MMX here without overwriting this array's tail
2494 find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
2495 &yuv2packed1, &yuv2packed2,
2500 const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2501 const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2502 const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2503 const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2504 if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2505 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2506 if ((dstY&chrSkipMask) || isGray(dstFormat))
2507 dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
2508 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
2509 const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
2510 yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
2511 dest, dstW, chrDstW);
2512 } else { //General YV12
2513 yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
2514 lumSrcPtr, vLumFilterSize,
2515 vChrFilter + chrDstY * vChrFilterSize,
2516 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2517 alpSrcPtr, dest, dstW, chrDstW);
2520 assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2521 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2522 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2523 int chrAlpha = vChrFilter[2 * dstY + 1];
2524 yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2525 alpPixBuf ? *alpSrcPtr : NULL,
2526 dest[0], dstW, chrAlpha, dstY);
2527 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2528 int lumAlpha = vLumFilter[2 * dstY + 1];
2529 int chrAlpha = vChrFilter[2 * dstY + 1];
2531 lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2533 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2534 yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2535 alpPixBuf ? alpSrcPtr : NULL,
2536 dest[0], dstW, lumAlpha, chrAlpha, dstY);
2537 } else { //general RGB
2538 yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2539 lumSrcPtr, vLumFilterSize,
2540 vChrFilter + dstY * vChrFilterSize,
2541 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2542 alpSrcPtr, dest[0], dstW, dstY);
2548 if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2549 fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2552 if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2553 __asm__ volatile("sfence":::"memory");
2557 /* store changed local vars back in the context */
2559 c->lumBufIndex= lumBufIndex;
2560 c->chrBufIndex= chrBufIndex;
2561 c->lastInLumBuf= lastInLumBuf;
2562 c->lastInChrBuf= lastInChrBuf;
2564 return dstY - lastDstY;
2567 static av_cold void sws_init_swScale_c(SwsContext *c)
2569 enum PixelFormat srcFormat = c->srcFormat;
2571 find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
2572 &c->yuv2packed1, &c->yuv2packed2,
2575 c->chrToYV12 = NULL;
2577 case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2578 case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2579 case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2580 case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2584 case PIX_FMT_BGR4_BYTE:
2585 case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2587 case PIX_FMT_YUV444P9LE:
2588 case PIX_FMT_YUV420P9LE:
2589 case PIX_FMT_YUV422P10LE:
2590 case PIX_FMT_YUV444P10LE:
2591 case PIX_FMT_YUV420P10LE:
2592 case PIX_FMT_YUV420P16LE:
2593 case PIX_FMT_YUV422P16LE:
2594 case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2596 case PIX_FMT_YUV444P9BE:
2597 case PIX_FMT_YUV420P9BE:
2598 case PIX_FMT_YUV444P10BE:
2599 case PIX_FMT_YUV422P10BE:
2600 case PIX_FMT_YUV420P10BE:
2601 case PIX_FMT_YUV420P16BE:
2602 case PIX_FMT_YUV422P16BE:
2603 case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2606 if (c->chrSrcHSubSample) {
2608 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2609 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2610 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2611 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2612 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2613 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2614 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2615 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2616 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2617 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2618 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2619 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2620 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2621 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2622 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2623 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2624 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2625 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2629 case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2630 case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2631 case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2632 case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2633 case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2634 case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2635 case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2636 case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2637 case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2638 case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2639 case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2640 case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2641 case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2642 case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2643 case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2644 case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2645 case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2646 case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2650 c->lumToYV12 = NULL;
2651 c->alpToYV12 = NULL;
2652 switch (srcFormat) {
2654 case PIX_FMT_YUV444P9LE:
2655 case PIX_FMT_YUV420P9LE:
2656 case PIX_FMT_YUV444P10LE:
2657 case PIX_FMT_YUV422P10LE:
2658 case PIX_FMT_YUV420P10LE:
2659 case PIX_FMT_YUV420P16LE:
2660 case PIX_FMT_YUV422P16LE:
2661 case PIX_FMT_YUV444P16LE:
2662 case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2664 case PIX_FMT_YUV444P9BE:
2665 case PIX_FMT_YUV420P9BE:
2666 case PIX_FMT_YUV444P10BE:
2667 case PIX_FMT_YUV422P10BE:
2668 case PIX_FMT_YUV420P10BE:
2669 case PIX_FMT_YUV420P16BE:
2670 case PIX_FMT_YUV422P16BE:
2671 case PIX_FMT_YUV444P16BE:
2672 case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2674 case PIX_FMT_YUYV422 :
2675 case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2676 case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2677 case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2678 case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2679 case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2680 case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2681 case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2682 case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2683 case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2684 case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2685 case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2686 case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2690 case PIX_FMT_BGR4_BYTE:
2691 case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2692 case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2693 case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2694 case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2695 case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2696 case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2697 case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2698 case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2699 case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2700 case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2701 case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2704 switch (srcFormat) {
2706 case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2708 case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2709 case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2713 if (c->scalingBpp == 8) {
2714 c->hScale = hScale_c;
2715 if (c->flags & SWS_FAST_BILINEAR) {
2716 c->hyscale_fast = hyscale_fast_c;
2717 c->hcscale_fast = hcscale_fast_c;
2720 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2722 c->lumConvertRange = lumRangeFromJpeg_c;
2723 c->chrConvertRange = chrRangeFromJpeg_c;
2725 c->lumConvertRange = lumRangeToJpeg_c;
2726 c->chrConvertRange = chrRangeToJpeg_c;
2730 c->hScale = hScale16_c;
2731 c->scale19To15Fw = scale19To15Fw_c;
2732 c->scale8To16Rv = scale8To16Rv_c;
2734 if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2736 c->lumConvertRange = lumRangeFromJpeg16_c;
2737 c->chrConvertRange = chrRangeFromJpeg16_c;
2739 c->lumConvertRange = lumRangeToJpeg16_c;
2740 c->chrConvertRange = chrRangeToJpeg16_c;
2745 if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2746 srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2747 c->needs_hcscale = 1;
2750 SwsFunc ff_getSwsFunc(SwsContext *c)
2752 sws_init_swScale_c(c);
2755 ff_sws_init_swScale_mmx(c);
2757 ff_sws_init_swScale_altivec(c);