From ca844b7be9c69c91113094ef21d720f1ca80db60 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 1 Aug 2012 15:31:43 +0200 Subject: [PATCH] x86: Use consistent 3dnowext function and macro name suffixes Currently there is a wild mix of 3dn2/3dnow2/3dnowext. Switching to "3dnowext", which is a more common name of the CPU flag, as reported e.g. by the Linux kernel, unifies this. --- libavcodec/x86/dsputil_mmx.c | 14 ++++++------- libavcodec/x86/fft.c | 6 +++--- libavcodec/x86/fft.h | 6 +++--- libavcodec/x86/fft_mmx.asm | 46 ++++++++++++++++++++--------------------- libavcodec/x86/fmtconvert.asm | 6 +++--- libavcodec/x86/fmtconvert_mmx.c | 10 +++++---- libavutil/x86/x86inc.asm | 2 +- 7 files changed, 46 insertions(+), 44 deletions(-) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 827705c003..d26f6126a8 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2358,9 +2358,9 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], } #if HAVE_6REGS -static void vector_fmul_window_3dnow2(float *dst, const float *src0, - const float *src1, const float *win, - int len) +static void vector_fmul_window_3dnowext(float *dst, const float *src0, + const float *src1, const float *win, + int len) { x86_reg i = -len * 4; x86_reg j = len * 4 - 8; @@ -2809,11 +2809,11 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, #endif } -static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx, - int mm_flags) +static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx, + int mm_flags) { #if HAVE_6REGS && HAVE_INLINE_ASM - c->vector_fmul_window = vector_fmul_window_3dnow2; + c->vector_fmul_window = vector_fmul_window_3dnowext; #endif } @@ -3051,7 +3051,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) dsputil_init_3dnow(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) - dsputil_init_3dnow2(c, avctx, mm_flags); + dsputil_init_3dnowext(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) dsputil_init_sse(c, avctx, mm_flags); diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c index f1c1c9d36b..fcde3fa797 100644 --- a/libavcodec/x86/fft.c +++ b/libavcodec/x86/fft.c @@ -34,9 +34,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s) } if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { /* 3DNowEx for K7 */ - s->imdct_calc = ff_imdct_calc_3dnow2; - s->imdct_half = ff_imdct_half_3dnow2; - s->fft_calc = ff_fft_calc_3dnow2; + s->imdct_calc = ff_imdct_calc_3dnowext; + s->imdct_half = ff_imdct_half_3dnowext; + s->fft_calc = ff_fft_calc_3dnowext; } #endif if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h index 1cefe7a9ee..6e80b95d11 100644 --- a/libavcodec/x86/fft.h +++ b/libavcodec/x86/fft.h @@ -25,12 +25,12 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); void ff_fft_calc_avx(FFTContext *s, FFTComplex *z); void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z); -void ff_fft_calc_3dnow2(FFTContext *s, FFTComplex *z); +void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z); void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index ac53296f70..7c0e9de311 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -93,14 +93,14 @@ cextern cos_ %+ i SECTION_TEXT -%macro T2_3DN 4 ; z0, z1, mem0, mem1 +%macro T2_3DNOW 4 ; z0, z1, mem0, mem1 mova %1, %3 mova %2, %1 pfadd %1, %4 pfsub %2, %4 %endmacro -%macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1 +%macro T4_3DNOW 6 ; z0, z1, z2, z3, tmp0, tmp1 mova %5, %3 pfsub %3, %4 pfadd %5, %4 ; {t6,t5} @@ -444,13 +444,13 @@ fft16_sse: ret -%macro FFT48_3DN 0 +%macro FFT48_3DNOW 0 align 16 fft4 %+ SUFFIX: - T2_3DN m0, m1, Z(0), Z(1) + T2_3DNOW m0, m1, Z(0), Z(1) mova m2, Z(2) mova m3, Z(3) - T4_3DN m0, m1, m2, m3, m4, m5 + T4_3DNOW m0, m1, m2, m3, m4, m5 PUNPCK m0, m1, m4 PUNPCK m2, m3, m5 mova Z(0), m0 @@ -461,14 +461,14 @@ fft4 %+ SUFFIX: align 16 fft8 %+ SUFFIX: - T2_3DN m0, m1, Z(0), Z(1) + T2_3DNOW m0, m1, Z(0), Z(1) mova m2, Z(2) mova m3, Z(3) - T4_3DN m0, m1, m2, m3, m4, m5 + T4_3DNOW m0, m1, m2, m3, m4, m5 mova Z(0), m0 mova Z(2), m2 - T2_3DN m4, m5, Z(4), Z(5) - T2_3DN m6, m7, Z2(6), Z2(7) + T2_3DNOW m4, m5, Z(4), Z(5) + T2_3DNOW m6, m7, Z2(6), Z2(7) PSWAPD m0, m5 PSWAPD m2, m7 pxor m0, [ps_m1p1] @@ -477,12 +477,12 @@ fft8 %+ SUFFIX: pfadd m7, m2 pfmul m5, [ps_root2] pfmul m7, [ps_root2] - T4_3DN m1, m3, m5, m7, m0, m2 + T4_3DNOW m1, m3, m5, m7, m0, m2 mova Z(5), m5 mova Z2(7), m7 mova m0, Z(0) mova m2, Z(2) - T4_3DN m0, m2, m4, m6, m5, m7 + T4_3DNOW m0, m2, m4, m6, m5, m7 PUNPCK m0, m1, m5 PUNPCK m2, m3, m7 mova Z(0), m0 @@ -500,7 +500,7 @@ fft8 %+ SUFFIX: %if ARCH_X86_32 %macro PSWAPD 2 -%if cpuflag(3dnow2) +%if cpuflag(3dnowext) pswapd %1, %2 %elifidn %1, %2 movd [r0+12], %1 @@ -512,11 +512,11 @@ fft8 %+ SUFFIX: %endif %endmacro -INIT_MMX 3dnow2 -FFT48_3DN +INIT_MMX 3dnowext +FFT48_3DNOW INIT_MMX 3dnow -FFT48_3DN +FFT48_3DNOW %endif %define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] @@ -633,7 +633,7 @@ cglobal fft_calc, 2,5,8 %if ARCH_X86_32 INIT_MMX 3dnow FFT_CALC_FUNC -INIT_MMX 3dnow2 +INIT_MMX 3dnowext FFT_CALC_FUNC %endif INIT_XMM sse @@ -727,7 +727,7 @@ cglobal imdct_calc, 3,5,3 %if ARCH_X86_32 INIT_MMX 3dnow IMDCT_CALC_FUNC -INIT_MMX 3dnow2 +INIT_MMX 3dnowext IMDCT_CALC_FUNC %endif @@ -743,8 +743,8 @@ INIT_MMX 3dnow %define unpckhps punpckhdq DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q] DECL_PASS pass_interleave_3dnow, PASS_BIG 0 -%define pass_3dnow2 pass_3dnow -%define pass_interleave_3dnow2 pass_interleave_3dnow +%define pass_3dnowext pass_3dnow +%define pass_interleave_3dnowext pass_interleave_3dnow %endif %ifdef PIC @@ -813,7 +813,7 @@ DECL_FFT 5, _interleave INIT_MMX 3dnow DECL_FFT 4 DECL_FFT 4, _interleave -INIT_MMX 3dnow2 +INIT_MMX 3dnowext DECL_FFT 4 DECL_FFT 4, _interleave %endif @@ -845,7 +845,7 @@ INIT_XMM sse PSWAPD m5, m3 pfmul m2, m3 pfmul m6, m5 -%if cpuflag(3dnow2) +%if cpuflag(3dnowext) pfpnacc m0, m4 pfpnacc m2, m6 %else @@ -1018,7 +1018,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i xor r4, r4 sub r4, r3 %endif -%if notcpuflag(3dnow2) && mmsize == 8 +%if notcpuflag(3dnowext) && mmsize == 8 movd m7, [ps_m1m1m1m1] %endif .pre: @@ -1102,7 +1102,7 @@ DECL_IMDCT POSROTATESHUF INIT_MMX 3dnow DECL_IMDCT POSROTATESHUF_3DNOW -INIT_MMX 3dnow2 +INIT_MMX 3dnowext DECL_IMDCT POSROTATESHUF_3DNOW %endif diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 4916e7af33..0fd14fefa3 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -249,7 +249,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 %macro PSWAPD_SSE 2 pshufw %1, %2, 0x4e %endmacro -%macro PSWAPD_3DN1 2 +%macro PSWAPD_3DNOW 2 movq %1, %2 psrlq %1, 32 punpckldq %1, %2 @@ -306,10 +306,10 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, %define pswapd PSWAPD_SSE FLOAT_TO_INT16_INTERLEAVE6 sse %define cvtps2pi pf2id -%define pswapd PSWAPD_3DN1 +%define pswapd PSWAPD_3DNOW FLOAT_TO_INT16_INTERLEAVE6 3dnow %undef pswapd -FLOAT_TO_INT16_INTERLEAVE6 3dn2 +FLOAT_TO_INT16_INTERLEAVE6 3dnowext %undef cvtps2pi ;----------------------------------------------------------------------------- diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c index aaf634d37f..fbdc5262b9 100644 --- a/libavcodec/x86/fmtconvert_mmx.c +++ b/libavcodec/x86/fmtconvert_mmx.c @@ -46,7 +46,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long l void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); -void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); +void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len); #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse @@ -74,9 +74,11 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow) FLOAT_TO_INT16_INTERLEAVE(sse) FLOAT_TO_INT16_INTERLEAVE(sse2) -static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ +static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src, + long len, int channels) +{ if(channels==6) - ff_float_to_int16_interleave6_3dn2(dst, src, len); + ff_float_to_int16_interleave6_3dnowext(dst, src, len); else float_to_int16_interleave_3dnow(dst, src, len, channels); } @@ -126,7 +128,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) } if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->float_to_int16_interleave = float_to_int16_interleave_3dn2; + c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; } } if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 7a75951cf6..03e6c0721b 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -557,7 +557,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %assign cpuflags_mmx (1<<0) %assign cpuflags_mmx2 (1<<1) | cpuflags_mmx %assign cpuflags_3dnow (1<<2) | cpuflags_mmx -%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow +%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow %assign cpuflags_sse (1<<4) | cpuflags_mmx2 %assign cpuflags_sse2 (1<<5) | cpuflags_sse %assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 -- 2.11.0