ARM: NEON avg_pixels8 and avg_h264_qpel8_mc00

author Måns Rullgård <mans@mansr.com>

Wed, 7 Oct 2009 21:35:19 +0000 (21:35 +0000)

committer Måns Rullgård <mans@mansr.com>

Wed, 7 Oct 2009 21:35:19 +0000 (21:35 +0000)
author Måns Rullgård <mans@mansr.com>
Wed, 7 Oct 2009 21:35:19 +0000 (21:35 +0000)
committer Måns Rullgård <mans@mansr.com>
Wed, 7 Oct 2009 21:35:19 +0000 (21:35 +0000)
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c

index a1d4980..ef706b2 100644 (file)
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -49,6 +49,7 @@ void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
  void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
  
  void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int);
+void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, int, int);
  
  void ff_add_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
  void ff_put_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
@@ -90,6 +91,8 @@ void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
  
  void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
  
+void ff_avg_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
+
  void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
  void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
  
@@ -230,6 +233,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
      c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
  
      c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
+    c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
  
      c->add_pixels_clamped = ff_add_pixels_clamped_neon;
      c->put_pixels_clamped = ff_put_pixels_clamped_neon;
@@ -278,6 +282,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
  
          c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
  
+        c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon;
+
          c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
          c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
          c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S

index 56e7cd3..ef5e8c7 100644 (file)
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -139,7 +139,7 @@
          bx              lr
          .endm
  
-        .macro pixels8
+        .macro pixels8 avg=0
  1:      vld1.64         {d0}, [r1], r2
          vld1.64         {d1}, [r1], r2
          vld1.64         {d2}, [r1], r2
@@ -148,6 +148,17 @@
          pld             [r1]
          pld             [r1, r2]
          pld             [r1, r2, lsl #1]
+.if \avg
+        vld1.64         {d4}, [r0,:64], r2
+        vrhadd.u8       d0,  d0,  d4
+        vld1.64         {d5}, [r0,:64], r2
+        vrhadd.u8       d1,  d1,  d5
+        vld1.64         {d6}, [r0,:64], r2
+        vrhadd.u8       d2,  d2,  d6
+        vld1.64         {d7}, [r0,:64], r2
+        vrhadd.u8       d3,  d3,  d7
+        sub             r0,  r0,  r2,  lsl #2
+.endif
          subs            r3,  r3,  #4
          vst1.64         {d0}, [r0,:64], r2
          vst1.64         {d1}, [r0,:64], r2
@@ -261,6 +272,12 @@ function ff_put_h264_qpel8_mc00_neon, export=1
          pixfunc2 put_ pixels8_y2,   _no_rnd, vhadd.u8
          pixfunc2 put_ pixels8_xy2,  _no_rnd, vshrn.u16, 1
  
+function ff_avg_h264_qpel8_mc00_neon, export=1
+        mov             r3,  #8
+        .endfunc
+
+        pixfunc  avg_ pixels8,, 1
+
  function ff_put_pixels_clamped_neon, export=1
          vld1.64         {d16-d19}, [r0,:128]!
          vqmovun.s16     d0, q8
author	Måns Rullgård <mans@mansr.com>
	Wed, 7 Oct 2009 21:35:19 +0000 (21:35 +0000)
committer	Måns Rullgård <mans@mansr.com>
	Wed, 7 Oct 2009 21:35:19 +0000 (21:35 +0000)
libavcodec/arm/dsputil_init_neon.c		patch \| blob \| history
libavcodec/arm/dsputil_neon.S		patch \| blob \| history