OSDN Git Service

Merge commit 'dfc50ac85e9d68a771b556297b7c411650206f3b'
authorMichael Niedermayer <michaelni@gmx.at>
Fri, 20 Dec 2013 22:44:31 +0000 (23:44 +0100)
committerMichael Niedermayer <michaelni@gmx.at>
Fri, 20 Dec 2013 22:44:31 +0000 (23:44 +0100)
* commit 'dfc50ac85e9d68a771b556297b7c411650206f3b':
  x86: mpegvideo: move denoise_dct asm to mpegvideoenc

Conflicts:
libavcodec/x86/mpegvideo.c
libavcodec/x86/mpegvideoenc.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
1  2 
libavcodec/x86/mpegvideo.c
libavcodec/x86/mpegvideoenc.c

@@@ -443,117 -443,7 +443,7 @@@ __asm__ volatile
          );
  }
  
- static void  denoise_dct_mmx(MpegEncContext *s, int16_t *block){
-     const int intra= s->mb_intra;
-     int *sum= s->dct_error_sum[intra];
-     uint16_t *offset= s->dct_offset[intra];
-     s->dct_count[intra]++;
-     __asm__ volatile(
-         "pxor %%mm7, %%mm7                      \n\t"
-         "1:                                     \n\t"
-         "pxor %%mm0, %%mm0                      \n\t"
-         "pxor %%mm1, %%mm1                      \n\t"
-         "movq (%0), %%mm2                       \n\t"
-         "movq 8(%0), %%mm3                      \n\t"
-         "pcmpgtw %%mm2, %%mm0                   \n\t"
-         "pcmpgtw %%mm3, %%mm1                   \n\t"
-         "pxor %%mm0, %%mm2                      \n\t"
-         "pxor %%mm1, %%mm3                      \n\t"
-         "psubw %%mm0, %%mm2                     \n\t"
-         "psubw %%mm1, %%mm3                     \n\t"
-         "movq %%mm2, %%mm4                      \n\t"
-         "movq %%mm3, %%mm5                      \n\t"
-         "psubusw (%2), %%mm2                    \n\t"
-         "psubusw 8(%2), %%mm3                   \n\t"
-         "pxor %%mm0, %%mm2                      \n\t"
-         "pxor %%mm1, %%mm3                      \n\t"
-         "psubw %%mm0, %%mm2                     \n\t"
-         "psubw %%mm1, %%mm3                     \n\t"
-         "movq %%mm2, (%0)                       \n\t"
-         "movq %%mm3, 8(%0)                      \n\t"
-         "movq %%mm4, %%mm2                      \n\t"
-         "movq %%mm5, %%mm3                      \n\t"
-         "punpcklwd %%mm7, %%mm4                 \n\t"
-         "punpckhwd %%mm7, %%mm2                 \n\t"
-         "punpcklwd %%mm7, %%mm5                 \n\t"
-         "punpckhwd %%mm7, %%mm3                 \n\t"
-         "paddd (%1), %%mm4                      \n\t"
-         "paddd 8(%1), %%mm2                     \n\t"
-         "paddd 16(%1), %%mm5                    \n\t"
-         "paddd 24(%1), %%mm3                    \n\t"
-         "movq %%mm4, (%1)                       \n\t"
-         "movq %%mm2, 8(%1)                      \n\t"
-         "movq %%mm5, 16(%1)                     \n\t"
-         "movq %%mm3, 24(%1)                     \n\t"
-         "add $16, %0                            \n\t"
-         "add $32, %1                            \n\t"
-         "add $16, %2                            \n\t"
-         "cmp %3, %0                             \n\t"
-             " jb 1b                             \n\t"
-         : "+r" (block), "+r" (sum), "+r" (offset)
-         : "r"(block+64)
-     );
- }
- static void  denoise_dct_sse2(MpegEncContext *s, int16_t *block){
-     const int intra= s->mb_intra;
-     int *sum= s->dct_error_sum[intra];
-     uint16_t *offset= s->dct_offset[intra];
-     s->dct_count[intra]++;
-     __asm__ volatile(
-         "pxor %%xmm7, %%xmm7                    \n\t"
-         "1:                                     \n\t"
-         "pxor %%xmm0, %%xmm0                    \n\t"
-         "pxor %%xmm1, %%xmm1                    \n\t"
-         "movdqa (%0), %%xmm2                    \n\t"
-         "movdqa 16(%0), %%xmm3                  \n\t"
-         "pcmpgtw %%xmm2, %%xmm0                 \n\t"
-         "pcmpgtw %%xmm3, %%xmm1                 \n\t"
-         "pxor %%xmm0, %%xmm2                    \n\t"
-         "pxor %%xmm1, %%xmm3                    \n\t"
-         "psubw %%xmm0, %%xmm2                   \n\t"
-         "psubw %%xmm1, %%xmm3                   \n\t"
-         "movdqa %%xmm2, %%xmm4                  \n\t"
-         "movdqa %%xmm3, %%xmm5                  \n\t"
-         "psubusw (%2), %%xmm2                   \n\t"
-         "psubusw 16(%2), %%xmm3                 \n\t"
-         "pxor %%xmm0, %%xmm2                    \n\t"
-         "pxor %%xmm1, %%xmm3                    \n\t"
-         "psubw %%xmm0, %%xmm2                   \n\t"
-         "psubw %%xmm1, %%xmm3                   \n\t"
-         "movdqa %%xmm2, (%0)                    \n\t"
-         "movdqa %%xmm3, 16(%0)                  \n\t"
-         "movdqa %%xmm4, %%xmm6                  \n\t"
-         "movdqa %%xmm5, %%xmm0                  \n\t"
-         "punpcklwd %%xmm7, %%xmm4               \n\t"
-         "punpckhwd %%xmm7, %%xmm6               \n\t"
-         "punpcklwd %%xmm7, %%xmm5               \n\t"
-         "punpckhwd %%xmm7, %%xmm0               \n\t"
-         "paddd (%1), %%xmm4                     \n\t"
-         "paddd 16(%1), %%xmm6                   \n\t"
-         "paddd 32(%1), %%xmm5                   \n\t"
-         "paddd 48(%1), %%xmm0                   \n\t"
-         "movdqa %%xmm4, (%1)                    \n\t"
-         "movdqa %%xmm6, 16(%1)                  \n\t"
-         "movdqa %%xmm5, 32(%1)                  \n\t"
-         "movdqa %%xmm0, 48(%1)                  \n\t"
-         "add $32, %0                            \n\t"
-         "add $64, %1                            \n\t"
-         "add $32, %2                            \n\t"
-         "cmp %3, %0                             \n\t"
-             " jb 1b                             \n\t"
-         : "+r" (block), "+r" (sum), "+r" (offset)
-         : "r"(block+64)
-           XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
-                             "%xmm4", "%xmm5", "%xmm6", "%xmm7")
-     );
- }
 -#endif /* HAVE_INLINE_ASM */
 +#endif /* HAVE_MMX_INLINE */
  
  av_cold void ff_MPV_common_init_x86(MpegEncContext *s)
  {
          if(!(s->flags & CODEC_FLAG_BITEXACT))
              s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
          s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
-         s->denoise_dct = denoise_dct_mmx;
-     }
-     if (INLINE_SSE2(cpu_flags)) {
-         s->denoise_dct = denoise_dct_sse2;
      }
 -#endif /* HAVE_INLINE_ASM */
 +#endif /* HAVE_MMX_INLINE */
  }
@@@ -82,7 -82,119 +82,119 @@@ DECLARE_ALIGNED(16, static uint16_t, in
  #include "mpegvideoenc_template.c"
  #endif /* HAVE_SSSE3_INLINE */
  
 -av_cold void ff_MPV_encode_init_x86(MpegEncContext *s)
+ #if HAVE_INLINE_ASM
+ static void  denoise_dct_mmx(MpegEncContext *s, int16_t *block){
+     const int intra= s->mb_intra;
+     int *sum= s->dct_error_sum[intra];
+     uint16_t *offset= s->dct_offset[intra];
+     s->dct_count[intra]++;
+     __asm__ volatile(
+         "pxor %%mm7, %%mm7                      \n\t"
+         "1:                                     \n\t"
+         "pxor %%mm0, %%mm0                      \n\t"
+         "pxor %%mm1, %%mm1                      \n\t"
+         "movq (%0), %%mm2                       \n\t"
+         "movq 8(%0), %%mm3                      \n\t"
+         "pcmpgtw %%mm2, %%mm0                   \n\t"
+         "pcmpgtw %%mm3, %%mm1                   \n\t"
+         "pxor %%mm0, %%mm2                      \n\t"
+         "pxor %%mm1, %%mm3                      \n\t"
+         "psubw %%mm0, %%mm2                     \n\t"
+         "psubw %%mm1, %%mm3                     \n\t"
+         "movq %%mm2, %%mm4                      \n\t"
+         "movq %%mm3, %%mm5                      \n\t"
+         "psubusw (%2), %%mm2                    \n\t"
+         "psubusw 8(%2), %%mm3                   \n\t"
+         "pxor %%mm0, %%mm2                      \n\t"
+         "pxor %%mm1, %%mm3                      \n\t"
+         "psubw %%mm0, %%mm2                     \n\t"
+         "psubw %%mm1, %%mm3                     \n\t"
+         "movq %%mm2, (%0)                       \n\t"
+         "movq %%mm3, 8(%0)                      \n\t"
+         "movq %%mm4, %%mm2                      \n\t"
+         "movq %%mm5, %%mm3                      \n\t"
+         "punpcklwd %%mm7, %%mm4                 \n\t"
+         "punpckhwd %%mm7, %%mm2                 \n\t"
+         "punpcklwd %%mm7, %%mm5                 \n\t"
+         "punpckhwd %%mm7, %%mm3                 \n\t"
+         "paddd (%1), %%mm4                      \n\t"
+         "paddd 8(%1), %%mm2                     \n\t"
+         "paddd 16(%1), %%mm5                    \n\t"
+         "paddd 24(%1), %%mm3                    \n\t"
+         "movq %%mm4, (%1)                       \n\t"
+         "movq %%mm2, 8(%1)                      \n\t"
+         "movq %%mm5, 16(%1)                     \n\t"
+         "movq %%mm3, 24(%1)                     \n\t"
+         "add $16, %0                            \n\t"
+         "add $32, %1                            \n\t"
+         "add $16, %2                            \n\t"
+         "cmp %3, %0                             \n\t"
+             " jb 1b                             \n\t"
+         : "+r" (block), "+r" (sum), "+r" (offset)
+         : "r"(block+64)
+     );
+ }
+ static void  denoise_dct_sse2(MpegEncContext *s, int16_t *block){
+     const int intra= s->mb_intra;
+     int *sum= s->dct_error_sum[intra];
+     uint16_t *offset= s->dct_offset[intra];
+     s->dct_count[intra]++;
+     __asm__ volatile(
+         "pxor %%xmm7, %%xmm7                    \n\t"
+         "1:                                     \n\t"
+         "pxor %%xmm0, %%xmm0                    \n\t"
+         "pxor %%xmm1, %%xmm1                    \n\t"
+         "movdqa (%0), %%xmm2                    \n\t"
+         "movdqa 16(%0), %%xmm3                  \n\t"
+         "pcmpgtw %%xmm2, %%xmm0                 \n\t"
+         "pcmpgtw %%xmm3, %%xmm1                 \n\t"
+         "pxor %%xmm0, %%xmm2                    \n\t"
+         "pxor %%xmm1, %%xmm3                    \n\t"
+         "psubw %%xmm0, %%xmm2                   \n\t"
+         "psubw %%xmm1, %%xmm3                   \n\t"
+         "movdqa %%xmm2, %%xmm4                  \n\t"
+         "movdqa %%xmm3, %%xmm5                  \n\t"
+         "psubusw (%2), %%xmm2                   \n\t"
+         "psubusw 16(%2), %%xmm3                 \n\t"
+         "pxor %%xmm0, %%xmm2                    \n\t"
+         "pxor %%xmm1, %%xmm3                    \n\t"
+         "psubw %%xmm0, %%xmm2                   \n\t"
+         "psubw %%xmm1, %%xmm3                   \n\t"
+         "movdqa %%xmm2, (%0)                    \n\t"
+         "movdqa %%xmm3, 16(%0)                  \n\t"
+         "movdqa %%xmm4, %%xmm6                  \n\t"
+         "movdqa %%xmm5, %%xmm0                  \n\t"
+         "punpcklwd %%xmm7, %%xmm4               \n\t"
+         "punpckhwd %%xmm7, %%xmm6               \n\t"
+         "punpcklwd %%xmm7, %%xmm5               \n\t"
+         "punpckhwd %%xmm7, %%xmm0               \n\t"
+         "paddd (%1), %%xmm4                     \n\t"
+         "paddd 16(%1), %%xmm6                   \n\t"
+         "paddd 32(%1), %%xmm5                   \n\t"
+         "paddd 48(%1), %%xmm0                   \n\t"
+         "movdqa %%xmm4, (%1)                    \n\t"
+         "movdqa %%xmm6, 16(%1)                  \n\t"
+         "movdqa %%xmm5, 32(%1)                  \n\t"
+         "movdqa %%xmm0, 48(%1)                  \n\t"
+         "add $32, %0                            \n\t"
+         "add $64, %1                            \n\t"
+         "add $32, %2                            \n\t"
+         "cmp %3, %0                             \n\t"
+             " jb 1b                             \n\t"
+         : "+r" (block), "+r" (sum), "+r" (offset)
+         : "r"(block+64)
+           XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
+                             "%xmm4", "%xmm5", "%xmm6", "%xmm7")
+     );
+ }
+ #endif /* HAVE_INLINE_ASM */
 +av_cold void ff_dct_encode_init_x86(MpegEncContext *s)
  {
      const int dct_algo = s->avctx->dct_algo;
      int i;