From f469094c9b166e0fe45c7d52c99edf9f98c49c98 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Thu, 21 Sep 2006 17:42:23 +0000 Subject: [PATCH] tweak ff_imdct_calc_3dn2 Originally committed as revision 6312 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/fft_3dn2.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libavcodec/i386/fft_3dn2.c b/libavcodec/i386/fft_3dn2.c index 5f066d175a..90f2b9f3c1 100644 --- a/libavcodec/i386/fft_3dn2.c +++ b/libavcodec/i386/fft_3dn2.c @@ -177,15 +177,13 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, ); } - z += n8; + k = n-8; asm volatile("movd %0, %%mm7" ::"r"(1<<31)); - for(k = 0; k < n8; k++) { - asm volatile( - "movq %0, %%mm0 \n\t" - "pswapd %1, %%mm1 \n\t" - ::"m"(z[k]), "m"(z[-1-k]) - ); - asm volatile( + asm volatile( + "1: \n\t" + "movq (%4,%0), %%mm0 \n\t" // z[n8+k] + "neg %0 \n\t" + "pswapd -8(%4,%0), %%mm1 \n\t" // z[n8-1-k] "movq %%mm0, %%mm2 \n\t" "pxor %%mm7, %%mm2 \n\t" "punpckldq %%mm1, %%mm2 \n\t" @@ -194,15 +192,17 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, "pswapd %%mm0, %%mm4 \n\t" "pxor %%mm7, %%mm0 \n\t" "pxor %%mm7, %%mm4 \n\t" - "movq %%mm0, %0 \n\t" // { -z[n8+k].im, z[n8-1-k].re } - "movq %%mm4, %1 \n\t" // { -z[n8-1-k].re, z[n8+k].im } - "movq %%mm2, %2 \n\t" // { -z[n8+k].re, z[n8-1-k].im } - "movq %%mm3, %3 \n\t" // { z[n8-1-k].im, -z[n8+k].re } - :"=m"(output[2*k]), "=m"(output[n2-2-2*k]), - "=m"(output[n2+2*k]), "=m"(output[n-2-2*k]) - ::"memory" - ); - } + "movq %%mm3, -8(%3,%0) \n\t" // output[n-2-2*k] = { z[n8-1-k].im, -z[n8+k].re } + "movq %%mm4, -8(%2,%0) \n\t" // output[n2-2-2*k]= { -z[n8-1-k].re, z[n8+k].im } + "neg %0 \n\t" + "movq %%mm0, (%1,%0) \n\t" // output[2*k] = { -z[n8+k].im, z[n8-1-k].re } + "movq %%mm2, (%2,%0) \n\t" // output[n2+2*k] = { -z[n8+k].re, z[n8-1-k].im } + "sub $8, %0 \n\t" + "jge 1b \n\t" + :"+r"(k) + :"r"(output), "r"(output+n2), "r"(output+n), "r"(z+n8) + :"memory" + ); asm volatile("femms"); } -- 2.11.0