From: Michael Niedermayer Date: Sun, 26 Aug 2007 01:11:02 +0000 (+0000) Subject: avoid overflow in the 3rd lifting step, this now needs mmx2 at minimum X-Git-Tag: v0.5~7762 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=629750290f6122a72e68c34cf94f521a90def2ef;p=coroid%2Flibav_saccubus.git avoid overflow in the 3rd lifting step, this now needs mmx2 at minimum (patch for plain mmx support is welcome ...) Originally committed as revision 10226 to svn://svn.ffmpeg.org/ffmpeg/trunk --- diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 3ebf1488f..191688ce2 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -3627,8 +3627,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; } else{ + if(mm_flags & MM_MMXEXT){ c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; + } c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; } #endif diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c index 6deaad233..e7f4b2953 100644 --- a/libavcodec/i386/snowdsp_mmx.c +++ b/libavcodec/i386/snowdsp_mmx.c @@ -111,22 +111,29 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ i = 0; asm volatile( - "psllw $2, %%xmm7 \n\t" + "psllw $13, %%xmm7 \n\t" + "pcmpeqw %%xmm6, %%xmm6 \n\t" + "psrlw $13, %%xmm6 \n\t" + "paddw %%xmm7, %%xmm6 \n\t" ::); for(; i> W_BS); asm volatile( - "psllw $2, %%mm7 \n\t" + "psllw $13, %%mm7 \n\t" + "pcmpeqw %%mm6, %%mm6 \n\t" + "psrlw $13, %%mm6 \n\t" + "paddw %%mm7, %%mm6 \n\t" ::); for(; i