VP8: much faster DC transform handling

author Jason Garrett-Glaser <darkshikari@gmail.com>

Mon, 2 Aug 2010 20:57:03 +0000 (20:57 +0000)

committer Jason Garrett-Glaser <darkshikari@gmail.com>

Mon, 2 Aug 2010 20:57:03 +0000 (20:57 +0000)
author Jason Garrett-Glaser <darkshikari@gmail.com>
Mon, 2 Aug 2010 20:57:03 +0000 (20:57 +0000)
committer Jason Garrett-Glaser <darkshikari@gmail.com>
Mon, 2 Aug 2010 20:57:03 +0000 (20:57 +0000)
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c

index 6519241..1069506 100644 (file)
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -868,6 +868,7 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
      int i, x, y, luma_start = 0, luma_ctx = 3;
      int nnz_pred, nnz, nnz_total = 0;
      int segment = s->segment;
+    int block_dc = 0;
  
      if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
          nnz_pred = t_nnz[8] + l_nnz[8];
@@ -876,8 +877,14 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
          nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
                                    s->qmat[segment].luma_dc_qmul);
          l_nnz[8] = t_nnz[8] = !!nnz;
-        nnz_total += nnz;
-        s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
+        if (nnz) {
+            nnz_total += nnz;
+            block_dc = 1;
+            if (nnz == 1)
+                s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
+            else
+                s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
+        }
          luma_start = 1;
          luma_ctx = 0;
      }
@@ -888,8 +895,8 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
              nnz_pred = l_nnz[y] + t_nnz[x];
              nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
                                        nnz_pred, s->qmat[segment].luma_qmul);
-            // nnz+luma_start may be one more than the actual last index, but we don't care
-            s->non_zero_count_cache[y][x] = nnz + luma_start;
+            // nnz+block_dc may be one more than the actual last index, but we don't care
+            s->non_zero_count_cache[y][x] = nnz + block_dc;
              t_nnz[x] = l_nnz[y] = !!nnz;
              nnz_total += nnz;
          }
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c

index 5f51248..b8cf0b2 100644 (file)
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -51,13 +51,25 @@ static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
          dc[i*4+2] = 0;
          dc[i*4+3] = 0;
  
-        *block[i][0] = (t0 + t1) >> 3;
-        *block[i][1] = (t3 + t2) >> 3;
-        *block[i][2] = (t0 - t1) >> 3;
-        *block[i][3] = (t3 - t2) >> 3;
+        block[i][0][0] = (t0 + t1) >> 3;
+        block[i][1][0] = (t3 + t2) >> 3;
+        block[i][2][0] = (t0 - t1) >> 3;
+        block[i][3][0] = (t3 - t2) >> 3;
      }
  }
  
+static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
+{
+    int i, val = (dc[0] + 3) >> 3;
+    dc[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        block[i][0][0] = val;
+        block[i][1][0] = val;
+        block[i][2][0] = val;
+        block[i][3][0] = val;
+    }
+}
  
  #define MUL_20091(a) ((((a)*20091) >> 16) + (a))
  #define MUL_35468(a)  (((a)*35468) >> 16)
@@ -480,6 +492,7 @@ VP8_BILINEAR(4)
  av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
  {
      dsp->vp8_luma_dc_wht    = vp8_luma_dc_wht_c;
+    dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c;
      dsp->vp8_idct_add       = vp8_idct_add_c;
      dsp->vp8_idct_dc_add    = vp8_idct_dc_add_c;
      dsp->vp8_idct_dc_add4y  = vp8_idct_dc_add4y_c;
diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h

index 47b1a90..ee5c7ec 100644 (file)
--- a/libavcodec/vp8dsp.h
+++ b/libavcodec/vp8dsp.h
@@ -31,6 +31,7 @@ typedef void (*vp8_mc_func)(uint8_t *dst/*align 8*/, int dstStride, uint8_t *src
  
  typedef struct VP8DSPContext {
      void (*vp8_luma_dc_wht)(DCTELEM block[4][4][16], DCTELEM dc[16]);
+    void (*vp8_luma_dc_wht_dc)(DCTELEM block[4][4][16], DCTELEM dc[16]);
      void (*vp8_idct_add)(uint8_t *dst, DCTELEM block[16], int stride);
      void (*vp8_idct_dc_add)(uint8_t *dst, DCTELEM block[16], int stride);
      void (*vp8_idct_dc_add4y)(uint8_t *dst, DCTELEM block[4][16], int stride);
author	Jason Garrett-Glaser <darkshikari@gmail.com>
	Mon, 2 Aug 2010 20:57:03 +0000 (20:57 +0000)
committer	Jason Garrett-Glaser <darkshikari@gmail.com>
	Mon, 2 Aug 2010 20:57:03 +0000 (20:57 +0000)
libavcodec/vp8.c		patch \| blob \| history
libavcodec/vp8dsp.c		patch \| blob \| history
libavcodec/vp8dsp.h		patch \| blob \| history