static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
{
-#if HAVE_MMX
- x86_reg
-#else
- long
-#endif
- idx = 15 - src_size;
+ x86_reg idx = 15 - src_size;
const uint8_t *s = src-idx;
uint8_t *d = dst-idx;
#if HAVE_MMX
long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
{
long y;
- const long chromWidth= width>>1;
+ const x86_reg chromWidth= width>>1;
for (y=0; y<height; y++)
{
#if HAVE_MMX
- x86_reg cw = chromWidth;
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
- ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (cw)
+ ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"REG_a
);
#else
long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
{
long y;
- const long chromWidth= width>>1;
+ const x86_reg chromWidth= width>>1;
for (y=0; y<height; y++)
{
#if HAVE_MMX
- x86_reg cw = chromWidth;
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
- ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (cw)
+ ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"REG_a
);
#else
long lumStride, long chromStride, long srcStride)
{
long y;
- const long chromWidth= width>>1;
+ const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2)
{
#if HAVE_MMX
- x86_reg cw = chromWidth;
__asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
- ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (cw)
+ ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a
);
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
- ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (cw)
+ ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a
);
#else
);
#else
- const long mmxSize=1;
+ const x86_reg mmxSize=1;
#endif
dst[0 ]= (3*src[0] + src[srcStride])>>2;
dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
long lumStride, long chromStride, long srcStride)
{
long y;
- const long chromWidth= width>>1;
+ const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2)
{
#if HAVE_MMX
- x86_reg cw = chromWidth;
__asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
- ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (cw)
+ ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a
);
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
- ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (cw)
+ ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a
);
#else
long lumStride, long chromStride, long srcStride)
{
long y;
- const long chromWidth= width>>1;
+ const x86_reg chromWidth= width>>1;
#if HAVE_MMX
- x86_reg cw = chromWidth;
for (y=0; y<height-2; y+=2)
{
long i;
"movd %%mm0, (%3, %%"REG_a") \n\t"
"add $4, %%"REG_a" \n\t"
" js 1b \n\t"
- : : "r" (src+cw*6), "r" (src+srcStride+cw*6), "r" (udst+cw), "r" (vdst+cw), "g" (-cw)
+ : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
: "%"REG_a, "%"REG_d
);
long srcStride1, long srcStride2,
long dstStride1, long dstStride2)
{
- long x,y,w,h;
+ x86_reg y;
+ long x,w,h;
w=width/2; h=height/2;
#if HAVE_MMX
__asm__ volatile(
long srcStride1, long srcStride2,
long srcStride3, long dstStride)
{
+ x86_reg x;
long y,w,h;
w=width/2; h=height;
for (y=0;y<h;y++){
const uint8_t* up=src2+srcStride2*(y>>2);
const uint8_t* vp=src3+srcStride3*(y>>2);
uint8_t* d=dst+dstStride*y;
+ x=0;
#if HAVE_MMX
- x86_reg x = 0;
for (;x<w-7;x+=8)
{
__asm__ volatile(
: "r"(yp), "r" (up), "r"(vp), "r"(d)
:"memory");
}
-#else
- long x = 0;
#endif
for (; x<w; x++)
{