From: Ian Braithwaite Date: Mon, 26 Mar 2007 10:03:57 +0000 (+0000) Subject: Optimize by building the mdct window and multipying/adding at the same time. X-Git-Tag: v0.5~9520 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=1890c2acf48eb6471d812915db092e32ed5c9b14;p=coroid%2Fffmpeg_saccubus.git Optimize by building the mdct window and multipying/adding at the same time. Patch by Ian Braithwaite ian .. braithwaite . dk [Ffmpeg-devel] WMA decoder speedup 2007-03-22 22:56 Originally committed as revision 8526 to svn://svn.ffmpeg.org/ffmpeg/trunk --- diff --git a/libavcodec/wma.c b/libavcodec/wma.c index 76f04def7..2241a07a0 100644 --- a/libavcodec/wma.c +++ b/libavcodec/wma.c @@ -302,7 +302,7 @@ int ff_wma_init(AVCodecContext * avctx, int flags2) window = av_malloc(sizeof(float) * n); alpha = M_PI / (2.0 * n); for(j=0;jwindows[i] = window; } diff --git a/libavcodec/wma.h b/libavcodec/wma.h index 17c62687e..cd4daa7e5 100644 --- a/libavcodec/wma.h +++ b/libavcodec/wma.h @@ -112,7 +112,6 @@ typedef struct WMACodecContext { int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); - DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]); MDCTContext mdct_ctx[BLOCK_NB_SIZES]; float *windows[BLOCK_NB_SIZES]; DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); ///< temporary storage for imdct diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c index 963698e4f..ef3cc7a33 100644 --- a/libavcodec/wmadec.c +++ b/libavcodec/wmadec.c @@ -316,6 +316,61 @@ static int decode_exp_vlc(WMACodecContext *s, int ch) return 0; } + +/** + * Apply MDCT window and add into output. + * + * We ensure that when the windows overlap their squared sum + * is always 1 (MDCT reconstruction rule). + */ +static void wma_window(WMACodecContext *s, float *out) +{ + float *in = s->output; + int block_len, bsize, n; + + /* left part */ + if (s->block_len_bits <= s->prev_block_len_bits) { + block_len = s->block_len; + bsize = s->frame_len_bits - s->block_len_bits; + + s->dsp.vector_fmul_add_add(out, in, s->windows[bsize], + out, 0, block_len, 1); + + } else { + block_len = 1 << s->prev_block_len_bits; + n = (s->block_len - block_len) / 2; + bsize = s->frame_len_bits - s->prev_block_len_bits; + + s->dsp.vector_fmul_add_add(out+n, in+n, s->windows[bsize], + out+n, 0, block_len, 1); + + memcpy(out+n+block_len, in+n+block_len, n*sizeof(float)); + } + + out += s->block_len; + in += s->block_len; + + /* right part */ + if (s->block_len_bits <= s->next_block_len_bits) { + block_len = s->block_len; + bsize = s->frame_len_bits - s->block_len_bits; + + s->dsp.vector_fmul_reverse(out, in, s->windows[bsize], block_len); + + } else { + block_len = 1 << s->next_block_len_bits; + n = (s->block_len - block_len) / 2; + bsize = s->frame_len_bits - s->next_block_len_bits; + + memcpy(out, in, n*sizeof(float)); + + s->dsp.vector_fmul_reverse(out+n, in+n, s->windows[bsize], block_len); + + memset(out+n+block_len, 0, n*sizeof(float)); + } +} + + /** * @return 0 if OK. 1 if last block of frame. return -1 if * unrecorrable error. @@ -657,54 +712,8 @@ static int wma_decode_block(WMACodecContext *s) } } - /* build the window : we ensure that when the windows overlap - their squared sum is always 1 (MDCT reconstruction rule) */ - /* XXX: merge with output */ - { - int i, next_block_len, block_len, prev_block_len, n; - float *wptr; - - block_len = s->block_len; - prev_block_len = 1 << s->prev_block_len_bits; - next_block_len = 1 << s->next_block_len_bits; - - /* right part */ - wptr = s->window + block_len; - if (block_len <= next_block_len) { - for(i=0;iwindows[bsize][i]; - } else { - /* overlap */ - n = (block_len / 2) - (next_block_len / 2); - for(i=0;iwindows[s->frame_len_bits - s->next_block_len_bits][i]; - for(i=0;iwindow + block_len; - if (block_len <= prev_block_len) { - for(i=0;iwindows[bsize][i]; - } else { - /* overlap */ - n = (block_len / 2) - (prev_block_len / 2); - for(i=0;iwindows[s->frame_len_bits - s->prev_block_len_bits][i]; - for(i=0;inb_channels; ch++) { if (s->channel_coded[ch]) { - float *ptr; int n4, index, n; n = s->block_len; @@ -712,19 +721,14 @@ static int wma_decode_block(WMACodecContext *s) s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], s->output, s->coefs[ch], s->mdct_tmp); - /* XXX: optimize all that by build the window and - multipying/adding at the same time */ - /* multiply by the window and add in the frame */ index = (s->frame_len / 2) + s->block_pos - n4; - ptr = &s->frame_out[ch][index]; - s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); + wma_window(s, &s->frame_out[ch][index]); /* specific fast case for ms-stereo : add to second channel if it is not coded */ if (s->ms_stereo && !s->channel_coded[1]) { - ptr = &s->frame_out[1][index]; - s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); + wma_window(s, &s->frame_out[1][index]); } } } @@ -779,9 +783,6 @@ static int wma_decode_frame(WMACodecContext *s, int16_t *samples) /* prepare for next block */ memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], s->frame_len * sizeof(float)); - /* XXX: suppress this */ - memset(&s->frame_out[ch][s->frame_len], 0, - s->frame_len * sizeof(float)); } #ifdef TRACE diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index 4fe3083b8..e902a6aff 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -92,8 +92,8 @@ static void apply_window_and_mdct(AVCodecContext * avctx, signed short * audio, memcpy(s->output, s->frame_out[channel], sizeof(float)*window_len); j = channel; for (i = 0; i < len; i++, j += avctx->channels){ - s->output[i+window_len] = audio[j] / n * win[i]; - s->frame_out[channel][i] = audio[j] / n * win[window_len - i - 1]; + s->output[i+window_len] = audio[j] / n * win[window_len - i - 1]; + s->frame_out[channel][i] = audio[j] / n * win[i]; } ff_mdct_calc(&s->mdct_ctx[window_index], s->coefs[channel], s->output, s->mdct_tmp); }