SSSE3 versions of vp8 width4 bilinear MC functions

author Jason Garrett-Glaser <darkshikari@gmail.com>

Sat, 3 Jul 2010 00:48:12 +0000 (00:48 +0000)

committer Jason Garrett-Glaser <darkshikari@gmail.com>

Sat, 3 Jul 2010 00:48:12 +0000 (00:48 +0000)
author Jason Garrett-Glaser <darkshikari@gmail.com>
Sat, 3 Jul 2010 00:48:12 +0000 (00:48 +0000)
committer Jason Garrett-Glaser <darkshikari@gmail.com>
Sat, 3 Jul 2010 00:48:12 +0000 (00:48 +0000)
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c

index 6247da9..698d394 100644 (file)
--- a/libavcodec/x86/vp8dsp-init.c
+++ b/libavcodec/x86/vp8dsp-init.c
@@ -85,6 +85,12 @@ extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
  extern void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, int dststride,
                                            uint8_t *src, int srcstride,
                                            int height, int mx, int my);
+extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
+                                          uint8_t *src, int srcstride,
+                                          int height, int mx, int my);
+extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
+                                          uint8_t *src, int srcstride,
+                                          int height, int mx, int my);
  
  extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
                                            uint8_t *src, int srcstride,
@@ -92,13 +98,14 @@ extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
  extern void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, int dststride,
                                            uint8_t *src, int srcstride,
                                            int height, int mx, int my);
-extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
+extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
                                            uint8_t *src, int srcstride,
                                            int height, int mx, int my);
-extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
+extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
                                            uint8_t *src, int srcstride,
                                            int height, int mx, int my);
  
+
  extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
                                      uint8_t *src, int srcstride,
                                      int height, int mx, int my);
@@ -207,6 +214,7 @@ HVBILIN(mmxext, 8,  8, 16)
  HVBILIN(mmxext, 8, 16, 16)
  HVBILIN(sse2,   8,  8, 16)
  HVBILIN(sse2,   8, 16, 16)
+HVBILIN(ssse3,  8,  4,  8)
  HVBILIN(ssse3,  8,  8, 16)
  HVBILIN(ssse3,  8, 16, 16)
  
@@ -284,6 +292,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
          VP8_MC_FUNC(2, 4, ssse3);
          VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
          VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
+        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
      }
  
      if (mm_flags & FF_MM_SSE4) {
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm

index 3ac9ca9..2c3eee4 100644 (file)
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -770,7 +770,8 @@ FILTER_BILINEAR mmxext, 4, 0
  INIT_XMM
  FILTER_BILINEAR   sse2, 8, 7
  
-cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
+%macro FILTER_BILINEAR_SSSE3 1
+cglobal put_vp8_bilinear%1_v_ssse3, 7,7
      shl      r6d, 4
  %ifdef PIC
      lea      r11, [bilinear_filter_vb_m]
@@ -789,9 +790,16 @@ cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
      psraw     m1, 2
      pavgw     m0, m4
      pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh [r0+r1*0], m0
+    movh [r0+r1*1], m1
+%else
      packuswb  m0, m1
      movh   [r0+r1*0], m0
      movhps [r0+r1*1], m0
+%endif
  
      lea       r0, [r0+r1*2]
      lea       r2, [r2+r3*2]
@@ -799,7 +807,7 @@ cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
      jg .nextrow
      REP_RET
  
-cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
+cglobal put_vp8_bilinear%1_h_ssse3, 7,7
      shl      r5d, 4
  %ifdef PIC
      lea      r11, [bilinear_filter_vb_m]
@@ -818,15 +826,28 @@ cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
      psraw     m1, 2
      pavgw     m0, m4
      pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh [r0+r1*0], m0
+    movh [r0+r1*1], m1
+%else
      packuswb  m0, m1
      movh   [r0+r1*0], m0
      movhps [r0+r1*1], m0
+%endif
  
      lea       r0, [r0+r1*2]
      lea       r2, [r2+r3*2]
      sub       r4, 2
      jg .nextrow
      REP_RET
+%endmacro
+
+INIT_MMX
+FILTER_BILINEAR_SSSE3 4
+INIT_XMM
+FILTER_BILINEAR_SSSE3 8
  
  cglobal put_vp8_pixels8_mmx, 5,5
  .nextrow:
author	Jason Garrett-Glaser <darkshikari@gmail.com>
	Sat, 3 Jul 2010 00:48:12 +0000 (00:48 +0000)
committer	Jason Garrett-Glaser <darkshikari@gmail.com>
	Sat, 3 Jul 2010 00:48:12 +0000 (00:48 +0000)
libavcodec/x86/vp8dsp-init.c		patch \| blob \| history
libavcodec/x86/vp8dsp.asm		patch \| blob \| history