From 28748a912887bb1799f5fafb587a03e051f11201 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Fri, 11 Jan 2008 08:29:58 +0000 Subject: [PATCH] Factorize some duplicated code from CAVS and H.264 into a common file. patch by Christophe Gisquet, christophe.gisquet free fr Originally committed as revision 11504 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/cavsdsp_mmx.c | 16 ------------ libavcodec/i386/dsputil_mmx.c | 51 -------------------------------------- libavcodec/i386/dsputil_mmx.h | 57 +++++++++++++++++++++++++++++++++++++++++++ libavcodec/i386/h264dsp_mmx.c | 6 ----- 4 files changed, 57 insertions(+), 73 deletions(-) diff --git a/libavcodec/i386/cavsdsp_mmx.c b/libavcodec/i386/cavsdsp_mmx.c index 4a23d0bc6..141382fb0 100644 --- a/libavcodec/i386/cavsdsp_mmx.c +++ b/libavcodec/i386/cavsdsp_mmx.c @@ -32,22 +32,6 @@ * ****************************************************************************/ -#define SUMSUB_BA( a, b ) \ - "paddw "#b", "#a" \n\t"\ - "paddw "#b", "#b" \n\t"\ - "psubw "#a", "#b" \n\t" - -#define SBUTTERFLY(a,b,t,n)\ - "movq " #a ", " #t " \n\t" /* abcd */\ - "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ - "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */ - -#define TRANSPOSE4(a,b,c,d,t)\ - SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\ - SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\ - SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\ - SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */ - static inline void cavs_idct8_1d(int16_t *block, uint64_t bias) { asm volatile( diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 8328c2811..9b787291c 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -203,17 +203,6 @@ DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 }; #undef DEF #undef PAVGB -#define SBUTTERFLY(a,b,t,n,m)\ - "mov" #m " " #a ", " #t " \n\t" /* abcd */\ - "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ - "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ - -#define TRANSPOSE4(a,b,c,d,t)\ - SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\ - SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\ - SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ - SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ - /***********************************/ /* standard MMX */ @@ -1545,46 +1534,6 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t #define DIFF_PIXELS_4x8(p1,p2,stride,temp) DIFF_PIXELS_8(d, q, %%mm, p1, p2, stride, temp) #define DIFF_PIXELS_8x8(p1,p2,stride,temp) DIFF_PIXELS_8(q, dqa, %%xmm, p1, p2, stride, temp) -#ifdef ARCH_X86_64 -// permutes 01234567 -> 05736421 -#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\ - SBUTTERFLY(a,b,%%xmm8,wd,dqa)\ - SBUTTERFLY(c,d,b,wd,dqa)\ - SBUTTERFLY(e,f,d,wd,dqa)\ - SBUTTERFLY(g,h,f,wd,dqa)\ - SBUTTERFLY(a,c,h,dq,dqa)\ - SBUTTERFLY(%%xmm8,b,c,dq,dqa)\ - SBUTTERFLY(e,g,b,dq,dqa)\ - SBUTTERFLY(d,f,g,dq,dqa)\ - SBUTTERFLY(a,e,f,qdq,dqa)\ - SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\ - SBUTTERFLY(h,b,d,qdq,dqa)\ - SBUTTERFLY(c,g,b,qdq,dqa)\ - "movdqa %%xmm8, "#g" \n\t" -#else -#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\ - "movdqa "#h", "#t" \n\t"\ - SBUTTERFLY(a,b,h,wd,dqa)\ - "movdqa "#h", 16"#t" \n\t"\ - "movdqa "#t", "#h" \n\t"\ - SBUTTERFLY(c,d,b,wd,dqa)\ - SBUTTERFLY(e,f,d,wd,dqa)\ - SBUTTERFLY(g,h,f,wd,dqa)\ - SBUTTERFLY(a,c,h,dq,dqa)\ - "movdqa "#h", "#t" \n\t"\ - "movdqa 16"#t", "#h" \n\t"\ - SBUTTERFLY(h,b,c,dq,dqa)\ - SBUTTERFLY(e,g,b,dq,dqa)\ - SBUTTERFLY(d,f,g,dq,dqa)\ - SBUTTERFLY(a,e,f,qdq,dqa)\ - SBUTTERFLY(h,d,e,qdq,dqa)\ - "movdqa "#h", 16"#t" \n\t"\ - "movdqa "#t", "#h" \n\t"\ - SBUTTERFLY(h,b,d,qdq,dqa)\ - SBUTTERFLY(c,g,b,qdq,dqa)\ - "movdqa 16"#t", "#g" \n\t" -#endif - #define LBUTTERFLY2(a1,b1,a2,b2)\ "paddw " #b1 ", " #a1 " \n\t"\ "paddw " #b2 ", " #a2 " \n\t"\ diff --git a/libavcodec/i386/dsputil_mmx.h b/libavcodec/i386/dsputil_mmx.h index fc5d01e70..a1571fc1e 100644 --- a/libavcodec/i386/dsputil_mmx.h +++ b/libavcodec/i386/dsputil_mmx.h @@ -52,4 +52,61 @@ extern const uint64_t ff_pb_FC; extern const double ff_pd_1[2]; extern const double ff_pd_2[2]; +/* in/out: mma=mma+mmb, mmb=mmb-mma */ +#define SUMSUB_BA( a, b ) \ + "paddw "#b", "#a" \n\t"\ + "paddw "#b", "#b" \n\t"\ + "psubw "#a", "#b" \n\t" + +#define SBUTTERFLY(a,b,t,n,m)\ + "mov" #m " " #a ", " #t " \n\t" /* abcd */\ + "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ + "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ + +#define TRANSPOSE4(a,b,c,d,t)\ + SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\ + SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\ + SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ + SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ + +#ifdef ARCH_X86_64 +// permutes 01234567 -> 05736421 +#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\ + SBUTTERFLY(a,b,%%xmm8,wd,dqa)\ + SBUTTERFLY(c,d,b,wd,dqa)\ + SBUTTERFLY(e,f,d,wd,dqa)\ + SBUTTERFLY(g,h,f,wd,dqa)\ + SBUTTERFLY(a,c,h,dq,dqa)\ + SBUTTERFLY(%%xmm8,b,c,dq,dqa)\ + SBUTTERFLY(e,g,b,dq,dqa)\ + SBUTTERFLY(d,f,g,dq,dqa)\ + SBUTTERFLY(a,e,f,qdq,dqa)\ + SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\ + SBUTTERFLY(h,b,d,qdq,dqa)\ + SBUTTERFLY(c,g,b,qdq,dqa)\ + "movdqa %%xmm8, "#g" \n\t" +#else +#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\ + "movdqa "#h", "#t" \n\t"\ + SBUTTERFLY(a,b,h,wd,dqa)\ + "movdqa "#h", 16"#t" \n\t"\ + "movdqa "#t", "#h" \n\t"\ + SBUTTERFLY(c,d,b,wd,dqa)\ + SBUTTERFLY(e,f,d,wd,dqa)\ + SBUTTERFLY(g,h,f,wd,dqa)\ + SBUTTERFLY(a,c,h,dq,dqa)\ + "movdqa "#h", "#t" \n\t"\ + "movdqa 16"#t", "#h" \n\t"\ + SBUTTERFLY(h,b,c,dq,dqa)\ + SBUTTERFLY(e,g,b,dq,dqa)\ + SBUTTERFLY(d,f,g,dq,dqa)\ + SBUTTERFLY(a,e,f,qdq,dqa)\ + SBUTTERFLY(h,d,e,qdq,dqa)\ + "movdqa "#h", 16"#t" \n\t"\ + "movdqa "#t", "#h" \n\t"\ + SBUTTERFLY(h,b,d,qdq,dqa)\ + SBUTTERFLY(c,g,b,qdq,dqa)\ + "movdqa 16"#t", "#g" \n\t" +#endif + #endif /* FFMPEG_DSPUTIL_MMX_H */ diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c index 14511db1e..1ecb0cbef 100644 --- a/libavcodec/i386/h264dsp_mmx.c +++ b/libavcodec/i386/h264dsp_mmx.c @@ -23,12 +23,6 @@ /***********************************/ /* IDCT */ -/* in/out: mma=mma+mmb, mmb=mmb-mma */ -#define SUMSUB_BA( a, b ) \ - "paddw "#b", "#a" \n\t"\ - "paddw "#b", "#b" \n\t"\ - "psubw "#a", "#b" \n\t" - #define SUMSUB_BADC( a, b, c, d ) \ "paddw "#b", "#a" \n\t"\ "paddw "#d", "#c" \n\t"\ -- 2.11.0