From 53b572118ed9f88219e94f08ef7bff79882247ae Mon Sep 17 00:00:00 2001 From: =?utf8?q?M=C3=A5ns=20Rullg=C3=A5rd?= Date: Tue, 22 Sep 2009 00:48:41 +0000 Subject: [PATCH] Add some dsputil functions useful for AAC decoder Originally committed as revision 19955 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/dsputil.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++ libavcodec/dsputil.h | 50 +++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 773d47f80..270c58362 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -4089,6 +4089,80 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c } } +static void vector_fmul_scalar_c(float *dst, const float *src, float mul, + int len) +{ + int i; + for (i = 0; i < len; i++) + dst[i] = src[i] * mul; +} + +static void vector_fmul_sv_scalar_2_c(float *dst, const float *src, + const float **sv, float mul, int len) +{ + int i; + for (i = 0; i < len; i += 2, sv++) { + dst[i ] = src[i ] * sv[0][0] * mul; + dst[i+1] = src[i+1] * sv[0][1] * mul; + } +} + +static void vector_fmul_sv_scalar_4_c(float *dst, const float *src, + const float **sv, float mul, int len) +{ + int i; + for (i = 0; i < len; i += 4, sv++) { + dst[i ] = src[i ] * sv[0][0] * mul; + dst[i+1] = src[i+1] * sv[0][1] * mul; + dst[i+2] = src[i+2] * sv[0][2] * mul; + dst[i+3] = src[i+3] * sv[0][3] * mul; + } +} + +static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul, + int len) +{ + int i; + for (i = 0; i < len; i += 2, sv++) { + dst[i ] = sv[0][0] * mul; + dst[i+1] = sv[0][1] * mul; + } +} + +static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul, + int len) +{ + int i; + for (i = 0; i < len; i += 4, sv++) { + dst[i ] = sv[0][0] * mul; + dst[i+1] = sv[0][1] * mul; + dst[i+2] = sv[0][2] * mul; + dst[i+3] = sv[0][3] * mul; + } +} + +static void butterflies_float_c(float *restrict v1, float *restrict v2, + int len) +{ + int i; + for (i = 0; i < len; i++) { + float t = v1[i] - v2[i]; + v1[i] += v2[i]; + v2[i] = t; + } +} + +static float scalarproduct_float_c(const float *v1, const float *v2, int len) +{ + float p = 0.0; + int i; + + for (i = 0; i < len; i++) + p += v1[i] * v2[i]; + + return p; +} + static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ int i; for(i=0; iadd_int16 = add_int16_c; c->sub_int16 = sub_int16_c; c->scalarproduct_int16 = scalarproduct_int16_c; + c->scalarproduct_float = scalarproduct_float_c; + c->butterflies_float = butterflies_float_c; + c->vector_fmul_scalar = vector_fmul_scalar_c; + + c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c; + c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c; + + c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c; + c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c; c->shrink[0]= ff_img_copy_plane; c->shrink[1]= ff_shrink22; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 88a0ee866..58d5b49bd 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -397,6 +397,56 @@ typedef struct DSPContext { /* assume len is a multiple of 8, and arrays are 16-byte aligned */ void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); + /** + * Multiply a vector of floats by a scalar float. Source and + * destination vectors must overlap exactly or not at all. + * @param dst result vector, 16-byte aligned + * @param src input vector, 16-byte aligned + * @param mul scalar value + * @param len length of vector, multiple of 4 + */ + void (*vector_fmul_scalar)(float *dst, const float *src, float mul, + int len); + /** + * Multiply a vector of floats by concatenated short vectors of + * floats and by a scalar float. Source and destination vectors + * must overlap exactly or not at all. + * [0]: short vectors of length 2, 8-byte aligned + * [1]: short vectors of length 4, 16-byte aligned + * @param dst output vector, 16-byte aligned + * @param src input vector, 16-byte aligned + * @param sv array of pointers to short vectors + * @param mul scalar value + * @param len number of elements in src and dst, multiple of 4 + */ + void (*vector_fmul_sv_scalar[2])(float *dst, const float *src, + const float **sv, float mul, int len); + /** + * Multiply short vectors of floats by a scalar float, store + * concatenated result. + * [0]: short vectors of length 2, 8-byte aligned + * [1]: short vectors of length 4, 16-byte aligned + * @param dst output vector, 16-byte aligned + * @param sv array of pointers to short vectors + * @param mul scalar value + * @param len number of output elements, multiple of 4 + */ + void (*sv_fmul_scalar[2])(float *dst, const float **sv, + float mul, int len); + /** + * Calculate the scalar product of two vectors of floats. + * @param v1 first vector, 16-byte aligned + * @param v2 second vector, 16-byte aligned + * @param len length of vectors, multiple of 4 + */ + float (*scalarproduct_float)(const float *v1, const float *v2, int len); + /** + * Calculate the sum and difference of two vectors of floats. + * @param v1 first input vector, sum output, 16-byte aligned + * @param v2 second input vector, difference output, 16-byte aligned + * @param len length of vectors, multiple of 4 + */ + void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ -- 2.11.0