From 7087ce08c84dd20404ba258096530cc547d25c15 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sat, 26 Mar 2011 15:20:30 +0000 Subject: [PATCH] Fixed-point FFT and MDCT --- libavcodec/Makefile | 10 ++++--- libavcodec/costablegen.c | 26 +++++++++++++++--- libavcodec/fft-internal.h | 67 +++++++++++++++++++++++++++++++++++++++++++++++ libavcodec/fft.c | 53 +++++++++++++++++++++---------------- libavcodec/fft.h | 41 +++++++++++++++++++++++++---- libavcodec/fft_fixed.c | 20 ++++++++++++++ libavcodec/fft_float.c | 20 ++++++++++++++ libavcodec/mdct.c | 32 ++++++++++------------ libavcodec/mdct_fixed.c | 20 ++++++++++++++ libavcodec/mdct_float.c | 20 ++++++++++++++ 10 files changed, 257 insertions(+), 52 deletions(-) create mode 100644 libavcodec/fft-internal.h create mode 100644 libavcodec/fft_fixed.c create mode 100644 libavcodec/fft_float.c create mode 100644 libavcodec/mdct_fixed.c create mode 100644 libavcodec/mdct_float.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2dea2036a..4f3392fcf 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -31,15 +31,16 @@ OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o OBJS-$(CONFIG_DCT) += dct.o OBJS-$(CONFIG_DWT) += dwt.o OBJS-$(CONFIG_DXVA2) += dxva2.o -FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o -OBJS-$(CONFIG_FFT) += avfft.o fft.o $(FFT-OBJS-yes) +FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o +OBJS-$(CONFIG_FFT) += avfft.o fft_fixed.o fft_float.o \ + $(FFT-OBJS-yes) OBJS-$(CONFIG_GOLOMB) += golomb.o OBJS-$(CONFIG_H264DSP) += h264dsp.o h264idct.o OBJS-$(CONFIG_H264PRED) += h264pred.o OBJS-$(CONFIG_HUFFMAN) += huffman.o OBJS-$(CONFIG_LPC) += lpc.o OBJS-$(CONFIG_LSP) += lsp.o -OBJS-$(CONFIG_MDCT) += mdct.o +OBJS-$(CONFIG_MDCT) += mdct_fixed.o mdct_float.o RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) OBJS-$(CONFIG_SINEWIN) += sinewin.o @@ -672,6 +673,9 @@ $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)cos_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF) $(M)./$< > $@ +$(SUBDIR)cos_fixed_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF) + $(M)./$< cos fixed > $@ + $(SUBDIR)sin_tables.c: $(SUBDIR)costablegen$(HOSTEXESUF) $(M)./$< sin > $@ diff --git a/libavcodec/costablegen.c b/libavcodec/costablegen.c index 33afd8de2..65c492696 100644 --- a/libavcodec/costablegen.c +++ b/libavcodec/costablegen.c @@ -29,14 +29,33 @@ #endif #define BITS 16 #define FLOATFMT "%.18e" +#define FIXEDFMT "%6d" + +static int clip_f15(int v) +{ + return v < -32767 ? -32767 : + v > 32767 ? 32767 : + v; +} + +static void printval(double val, int fixed) +{ + if (fixed) + printf(" "FIXEDFMT",", clip_f15(lrint(val * (double)(1<<15)))); + else + printf(" "FLOATFMT",", val); + +} int main(int argc, char *argv[]) { int i, j; - int do_sin = argc == 2 && !strcmp(argv[1], "sin"); + int do_sin = argc > 1 && !strcmp(argv[1], "sin"); + int fixed = argc > 2 && !strcmp(argv[2], "fixed"); double (*func)(double) = do_sin ? sin : cos; printf("/* This file was generated by libavcodec/costablegen */\n"); + printf("#define CONFIG_FFT_FLOAT %d\n", !fixed); printf("#include \"libavcodec/%s\"\n", do_sin ? "rdft.h" : "fft.h"); for (i = 4; i <= BITS; i++) { int m = 1 << i; @@ -46,11 +65,12 @@ int main(int argc, char *argv[]) int idx = j > m/4 ? m/2 - j : j; if (do_sin && j >= m/4) idx = m/4 - j; - printf(" "FLOATFMT",", func(idx*freq)); + printval(func(idx*freq), fixed); if ((j & 3) == 3) printf("\n "); } - printf(" "FLOATFMT"\n};\n", func(do_sin ? -(m/4 - 1)*freq : freq)); + printval(func(do_sin ? -(m/4 - 1)*freq : freq), fixed); + printf("\n};\n"); } return 0; } diff --git a/libavcodec/fft-internal.h b/libavcodec/fft-internal.h new file mode 100644 index 000000000..1f240dbd1 --- /dev/null +++ b/libavcodec/fft-internal.h @@ -0,0 +1,67 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_FFT_INTERNAL_H +#define AVCODEC_FFT_INTERNAL_H + +#if CONFIG_FFT_FLOAT + +#define FIX15(v) (v) +#define sqrthalf (float)M_SQRT1_2 + +#define BF(x, y, a, b) do { \ + x = a - b; \ + y = a + b; \ + } while (0) + +#define CMUL(dre, dim, are, aim, bre, bim) do { \ + (dre) = (are) * (bre) - (aim) * (bim); \ + (dim) = (are) * (bim) + (aim) * (bre); \ + } while (0) + +#else + +#include "libavutil/intmath.h" +#include "mathops.h" + +#define SCALE_FLOAT(a, bits) lrint((a) * (double)(1 << (bits))) +#define FIX15(a) av_clip(SCALE_FLOAT(a, 15), -32767, 32767) + +#define sqrthalf ((int16_t)((1<<15)*M_SQRT1_2)) + +#define BF(x, y, a, b) do { \ + x = (a - b) >> 1; \ + y = (a + b) >> 1; \ + } while (0) + +#define CMUL(dre, dim, are, aim, bre, bim) do { \ + (dre) = (MUL16(are, bre) - MUL16(aim, bim)) >> 15; \ + (dim) = (MUL16(are, bim) + MUL16(aim, bre)) >> 15; \ + } while (0) + +#endif /* CONFIG_FFT_FLOAT */ + +#define ff_imdct_calc_c FFT_NAME(ff_imdct_calc_c) +#define ff_imdct_half_c FFT_NAME(ff_imdct_half_c) +#define ff_mdct_calc_c FFT_NAME(ff_mdct_calc_c) + +void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); + +#endif /* AVCODEC_FFT_INTERNAL_H */ diff --git a/libavcodec/fft.c b/libavcodec/fft.c index 76e9c4139..2da5b43d5 100644 --- a/libavcodec/fft.c +++ b/libavcodec/fft.c @@ -30,6 +30,7 @@ #include #include "libavutil/mathematics.h" #include "fft.h" +#include "fft-internal.h" /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ #if !CONFIG_HARDCODED_TABLES @@ -47,10 +48,21 @@ COSTABLE(16384); COSTABLE(32768); COSTABLE(65536); #endif -COSTABLE_CONST FFTSample * const ff_cos_tabs[] = { +COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = { NULL, NULL, NULL, NULL, - ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024, - ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536, + FFT_NAME(ff_cos_16), + FFT_NAME(ff_cos_32), + FFT_NAME(ff_cos_64), + FFT_NAME(ff_cos_128), + FFT_NAME(ff_cos_256), + FFT_NAME(ff_cos_512), + FFT_NAME(ff_cos_1024), + FFT_NAME(ff_cos_2048), + FFT_NAME(ff_cos_4096), + FFT_NAME(ff_cos_8192), + FFT_NAME(ff_cos_16384), + FFT_NAME(ff_cos_32768), + FFT_NAME(ff_cos_65536), }; static void ff_fft_permute_c(FFTContext *s, FFTComplex *z); @@ -73,9 +85,9 @@ av_cold void ff_init_ff_cos_tabs(int index) int i; int m = 1<mdct_calc = ff_mdct_calc_c; #endif +#if CONFIG_FFT_FLOAT if (ARCH_ARM) ff_fft_init_arm(s); if (HAVE_ALTIVEC) ff_fft_init_altivec(s); if (HAVE_MMX) ff_fft_init_mmx(s); +#endif for(j=4; j<=nbits; j++) { ff_init_ff_cos_tabs(j); @@ -144,13 +158,6 @@ av_cold void ff_fft_end(FFTContext *s) av_freep(&s->tmp_buf); } -#define sqrthalf (float)M_SQRT1_2 - -#define BF(x,y,a,b) {\ - x = a - b;\ - y = a + b;\ -} - #define BUTTERFLIES(a0,a1,a2,a3) {\ BF(t3, t5, t5, t1);\ BF(a2.re, a0.re, a0.re, t5);\ @@ -174,10 +181,8 @@ av_cold void ff_fft_end(FFTContext *s) } #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - t1 = a2.re * wre + a2.im * wim;\ - t2 = a2.im * wre - a2.re * wim;\ - t5 = a3.re * wre - a3.im * wim;\ - t6 = a3.im * wre + a3.re * wim;\ + CMUL(t1, t2, a2.re, a2.im, wre, -wim);\ + CMUL(t5, t6, a3.re, a3.im, wre, wim);\ BUTTERFLIES(a0,a1,a2,a3)\ } @@ -193,7 +198,7 @@ av_cold void ff_fft_end(FFTContext *s) #define PASS(name)\ static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ {\ - FFTSample t1, t2, t3, t4, t5, t6;\ + FFTDouble t1, t2, t3, t4, t5, t6;\ int o1 = 2*n;\ int o2 = 4*n;\ int o3 = 6*n;\ @@ -222,12 +227,12 @@ static void fft##n(FFTComplex *z)\ fft##n2(z);\ fft##n4(z+n4*2);\ fft##n4(z+n4*3);\ - pass(z,ff_cos_##n,n4/2);\ + pass(z,FFT_NAME(ff_cos_##n),n4/2);\ } static void fft4(FFTComplex *z) { - FFTSample t1, t2, t3, t4, t5, t6, t7, t8; + FFTDouble t1, t2, t3, t4, t5, t6, t7, t8; BF(t3, t1, z[0].re, z[1].re); BF(t8, t6, z[3].re, z[2].re); @@ -241,7 +246,7 @@ static void fft4(FFTComplex *z) static void fft8(FFTComplex *z) { - FFTSample t1, t2, t3, t4, t5, t6, t7, t8; + FFTDouble t1, t2, t3, t4, t5, t6, t7, t8; fft4(z); @@ -262,7 +267,9 @@ static void fft8(FFTComplex *z) #if !CONFIG_SMALL static void fft16(FFTComplex *z) { - FFTSample t1, t2, t3, t4, t5, t6; + FFTDouble t1, t2, t3, t4, t5, t6; + FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1]; + FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3]; fft8(z); fft4(z+8); @@ -270,8 +277,8 @@ static void fft16(FFTComplex *z) TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); - TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]); - TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]); + TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3); + TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1); } #else DECL_FFT(16,8,4) diff --git a/libavcodec/fft.h b/libavcodec/fft.h index 36be9962a..2f13e5fdb 100644 --- a/libavcodec/fft.h +++ b/libavcodec/fft.h @@ -22,11 +22,37 @@ #ifndef AVCODEC_FFT_H #define AVCODEC_FFT_H +#ifndef CONFIG_FFT_FLOAT +#define CONFIG_FFT_FLOAT 1 +#endif + #include #include "config.h" #include "libavutil/mem.h" + +#if CONFIG_FFT_FLOAT + #include "avfft.h" +#define FFT_NAME(x) x + +typedef float FFTDouble; + +#else + +#define FFT_NAME(x) x ## _fixed + +typedef int16_t FFTSample; +typedef int FFTDouble; + +typedef struct FFTComplex { + int16_t re, im; +} FFTComplex; + +typedef struct FFTContext FFTContext; + +#endif /* CONFIG_FFT_FLOAT */ + /* FFT computation */ struct FFTContext { @@ -66,7 +92,7 @@ struct FFTContext { #endif #define COSTABLE(size) \ - COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2] + COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, FFT_NAME(ff_cos_##size))[size/2] extern COSTABLE(16); extern COSTABLE(32); @@ -81,7 +107,9 @@ extern COSTABLE(8192); extern COSTABLE(16384); extern COSTABLE(32768); extern COSTABLE(65536); -extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17]; +extern COSTABLE_CONST FFTSample* const FFT_NAME(ff_cos_tabs)[17]; + +#define ff_init_ff_cos_tabs FFT_NAME(ff_init_ff_cos_tabs) /** * Initialize the cosine table in ff_cos_tabs[index] @@ -89,6 +117,9 @@ extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17]; */ void ff_init_ff_cos_tabs(int index); +#define ff_fft_init FFT_NAME(ff_fft_init) +#define ff_fft_end FFT_NAME(ff_fft_end) + /** * Set up a complex FFT. * @param nbits log2 of the length of the input array @@ -102,10 +133,10 @@ void ff_fft_init_arm(FFTContext *s); void ff_fft_end(FFTContext *s); +#define ff_mdct_init FFT_NAME(ff_mdct_init) +#define ff_mdct_end FFT_NAME(ff_mdct_end) + int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale); -void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_mdct_end(FFTContext *s); #endif /* AVCODEC_FFT_H */ diff --git a/libavcodec/fft_fixed.c b/libavcodec/fft_fixed.c new file mode 100644 index 000000000..b28091d35 --- /dev/null +++ b/libavcodec/fft_fixed.c @@ -0,0 +1,20 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define CONFIG_FFT_FLOAT 0 +#include "fft.c" diff --git a/libavcodec/fft_float.c b/libavcodec/fft_float.c new file mode 100644 index 000000000..24c9fdb36 --- /dev/null +++ b/libavcodec/fft_float.c @@ -0,0 +1,20 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define CONFIG_FFT_FLOAT 1 +#include "fft.c" diff --git a/libavcodec/mdct.c b/libavcodec/mdct.c index 9edb57760..6f6453427 100644 --- a/libavcodec/mdct.c +++ b/libavcodec/mdct.c @@ -24,12 +24,19 @@ #include "libavutil/common.h" #include "libavutil/mathematics.h" #include "fft.h" +#include "fft-internal.h" /** * @file * MDCT/IMDCT transforms. */ +#if CONFIG_FFT_FLOAT +# define RSCALE(x) (x) +#else +# define RSCALE(x) ((x) >> 1) +#endif + /** * init MDCT or IMDCT computation. */ @@ -70,8 +77,8 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale) scale = sqrt(fabs(scale)); for(i=0;itcos[i*tstep] = -cos(alpha) * scale; - s->tsin[i*tstep] = -sin(alpha) * scale; + s->tcos[i*tstep] = FIX15(-cos(alpha) * scale); + s->tsin[i*tstep] = FIX15(-sin(alpha) * scale); } return 0; fail: @@ -79,17 +86,6 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale) return -1; } -/* complex multiplication: p = a * b */ -#define CMUL(pre, pim, are, aim, bre, bim) \ -{\ - FFTSample _are = (are);\ - FFTSample _aim = (aim);\ - FFTSample _bre = (bre);\ - FFTSample _bim = (bim);\ - (pre) = _are * _bre - _aim * _bim;\ - (pim) = _are * _bim + _aim * _bre;\ -} - /** * Compute the middle half of the inverse MDCT of size N = 2^nbits, * thus excluding the parts that can be derived by symmetry @@ -161,7 +157,7 @@ void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input) void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input) { int i, j, n, n8, n4, n2, n3; - FFTSample re, im; + FFTDouble re, im; const uint16_t *revtab = s->revtab; const FFTSample *tcos = s->tcos; const FFTSample *tsin = s->tsin; @@ -175,13 +171,13 @@ void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input) /* pre rotation */ for(i=0;i