From c5b42f4a80b6668cb10d87b065f4e95bea6311c0 Mon Sep 17 00:00:00 2001
From: NVIDIA Corporation <>
Date: Tue, 20 Jan 2009 09:28:36 +0000
Subject: [PATCH] Add VDPAU hardware accelerated decoding for WMV3 and VC1
 which can be used by video players.

Original patch by NVIDIA corporation.

Originally committed as revision 16699 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 configure                   |  2 ++
 libavcodec/Makefile         |  2 ++
 libavcodec/allcodecs.c      |  2 ++
 libavcodec/imgconvert.c     |  6 ++++
 libavcodec/vc1.c            | 48 +++++++++++++++++++++++++++++++
 libavcodec/vdpau.h          |  1 +
 libavcodec/vdpau_internal.h |  3 ++
 libavcodec/vdpauvideo.c     | 70 +++++++++++++++++++++++++++++++++++++++++++++
 libavutil/avutil.h          |  2 ++
 9 files changed, 136 insertions(+)

diff --git a/configure b/configure
index b80bb3843..54ecbfe12 100755
--- a/configure
+++ b/configure
@@ -1016,6 +1016,7 @@ svq3_decoder_suggest="zlib"
 tiff_decoder_suggest="zlib"
 tiff_encoder_suggest="zlib"
 tscc_decoder_select="zlib"
+vc1_vdpau_decoder_deps="vdpau"
 vorbis_decoder_select="fft mdct"
 vorbis_encoder_select="fft mdct"
 wmav1_decoder_select="fft mdct"
@@ -1024,6 +1025,7 @@ wmav2_decoder_select="fft mdct"
 wmav2_encoder_select="fft mdct"
 wmv1_encoder_select="aandct"
 wmv2_encoder_select="aandct"
+wmv3_vdpau_decoder_deps="vdpau"
 zlib_decoder_select="zlib"
 zlib_encoder_select="zlib"
 zmbv_decoder_select="zlib"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fa8b71bc7..9c487fb1a 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -218,6 +218,7 @@ OBJS-$(CONFIG_TXD_DECODER)             += txd.o s3tc.o
 OBJS-$(CONFIG_ULTI_DECODER)            += ulti.o
 OBJS-$(CONFIG_VB_DECODER)              += vb.o
 OBJS-$(CONFIG_VC1_DECODER)             += vc1.o vc1data.o vc1dsp.o msmpeg4data.o h263dec.o h263.o intrax8.o intrax8dsp.o error_resilience.o mpegvideo.o
+OBJS-$(CONFIG_VC1_VDPAU_DECODER)       += vdpauvideo.o vc1.o vc1data.o vc1dsp.o msmpeg4data.o h263dec.o h263.o intrax8.o intrax8dsp.o error_resilience.o mpegvideo.o
 OBJS-$(CONFIG_VCR1_DECODER)            += vcr1.o
 OBJS-$(CONFIG_VCR1_ENCODER)            += vcr1.o
 OBJS-$(CONFIG_VMDAUDIO_DECODER)        += vmdav.o
@@ -241,6 +242,7 @@ OBJS-$(CONFIG_WMV1_ENCODER)            += mpegvideo_enc.o motion_est.o ratecontr
 OBJS-$(CONFIG_WMV2_DECODER)            += wmv2dec.o wmv2.o msmpeg4.o msmpeg4data.o h263dec.o h263.o intrax8.o intrax8dsp.o mpeg12data.o mpegvideo.o error_resilience.o
 OBJS-$(CONFIG_WMV2_ENCODER)            += wmv2enc.o wmv2.o msmpeg4.o msmpeg4data.o mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
 OBJS-$(CONFIG_WMV3_DECODER)            += vc1.o vc1data.o vc1dsp.o msmpeg4data.o h263dec.o h263.o intrax8.o intrax8dsp.o error_resilience.o mpegvideo.o
+OBJS-$(CONFIG_WMV3_VDPAU_DECODER)      += vdpauvideo.o vc1.o vc1data.o vc1dsp.o msmpeg4data.o h263dec.o h263.o intrax8.o intrax8dsp.o error_resilience.o mpegvideo.o
 OBJS-$(CONFIG_WNV1_DECODER)            += wnv1.o
 OBJS-$(CONFIG_WS_SND1_DECODER)         += ws-snd1.o
 OBJS-$(CONFIG_XAN_DPCM_DECODER)        += dpcm.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 9121a6f7b..f369c0a20 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -157,6 +157,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (ULTI, ulti);
     REGISTER_DECODER (VB, vb);
     REGISTER_DECODER (VC1, vc1);
+    REGISTER_DECODER (VC1_VDPAU, vc1_vdpau);
     REGISTER_DECODER (VCR1, vcr1);
     REGISTER_DECODER (VMDVIDEO, vmdvideo);
     REGISTER_DECODER (VMNC, vmnc);
@@ -169,6 +170,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC  (WMV1, wmv1);
     REGISTER_ENCDEC  (WMV2, wmv2);
     REGISTER_DECODER (WMV3, wmv3);
+    REGISTER_DECODER (WMV3_VDPAU, wmv3_vdpau);
     REGISTER_DECODER (WNV1, wnv1);
     REGISTER_DECODER (XAN_WC3, xan_wc3);
     REGISTER_DECODER (XL, xl);
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 7c62b66db..17c8473c3 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -276,6 +276,12 @@ static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
     [PIX_FMT_VDPAU_H264] = {
         .name = "vdpau_h264",
     },
+    [PIX_FMT_VDPAU_WMV3] = {
+        .name = "vdpau_wmv3",
+    },
+    [PIX_FMT_VDPAU_VC1] = {
+        .name = "vdpau_vc1",
+    },
     [PIX_FMT_UYYVYY411] = {
         .name = "uyyvyy411",
         .nb_channels = 1,
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index dee0adcd9..83022b49c 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -35,6 +35,7 @@
 #include "unary.h"
 #include "simple_idct.h"
 #include "mathops.h"
+#include "vdpau_internal.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -4130,6 +4131,7 @@ static int vc1_decode_frame(AVCodecContext *avctx,
     MpegEncContext *s = &v->s;
     AVFrame *pict = data;
     uint8_t *buf2 = NULL;
+    const uint8_t *buf_vdpau = buf;
 
     /* no supplementary picture */
     if (buf_size == 0) {
@@ -4151,6 +4153,13 @@ static int vc1_decode_frame(AVCodecContext *avctx,
         s->current_picture_ptr= &s->picture[i];
     }
 
+    if (s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
+        if (v->profile < PROFILE_ADVANCED)
+            avctx->pix_fmt = PIX_FMT_VDPAU_WMV3;
+        else
+            avctx->pix_fmt = PIX_FMT_VDPAU_VC1;
+    }
+
     //for advanced profile we may need to parse and unescape data
     if (avctx->codec_id == CODEC_ID_VC1) {
         int buf_size2 = 0;
@@ -4167,6 +4176,8 @@ static int vc1_decode_frame(AVCodecContext *avctx,
                 if(size <= 0) continue;
                 switch(AV_RB32(start)){
                 case VC1_CODE_FRAME:
+                    if (s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+                        buf_vdpau = start;
                     buf_size2 = vc1_unescape_buffer(start + 4, size, buf2);
                     break;
                 case VC1_CODE_ENTRYPOINT: /* it should be before frame data */
@@ -4255,6 +4266,10 @@ static int vc1_decode_frame(AVCodecContext *avctx,
     s->me.qpel_put= s->dsp.put_qpel_pixels_tab;
     s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab;
 
+    if ((CONFIG_VC1_VDPAU_DECODER || CONFIG_WMV3_VDPAU_DECODER)
+        &&s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+        ff_vdpau_vc1_decode_picture(s, buf_vdpau, (buf + buf_size) - buf_vdpau);
+    else {
     ff_er_frame_start(s);
 
     v->bits = buf_size * 8;
@@ -4263,6 +4278,7 @@ static int vc1_decode_frame(AVCodecContext *avctx,
 //  if(get_bits_count(&s->gb) > buf_size * 8)
 //      return -1;
     ff_er_frame_end(s);
+    }
 
     MPV_frame_end(s);
 
@@ -4336,3 +4352,35 @@ AVCodec wmv3_decoder = {
     NULL,
     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9"),
 };
+
+#if CONFIG_WMV3_VDPAU_DECODER
+AVCodec wmv3_vdpau_decoder = {
+    "wmv3_vdpau",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WMV3,
+    sizeof(VC1Context),
+    vc1_decode_init,
+    NULL,
+    vc1_decode_end,
+    vc1_decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    NULL,
+    .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 VDPAU"),
+};
+#endif
+
+#if CONFIG_VC1_VDPAU_DECODER
+AVCodec vc1_vdpau_decoder = {
+    "vc1_vdpau",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VC1,
+    sizeof(VC1Context),
+    vc1_decode_init,
+    NULL,
+    vc1_decode_end,
+    vc1_decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    NULL,
+    .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1 VDPAU"),
+};
+#endif
diff --git a/libavcodec/vdpau.h b/libavcodec/vdpau.h
index a28ad7fa8..3d74fdd47 100644
--- a/libavcodec/vdpau.h
+++ b/libavcodec/vdpau.h
@@ -72,6 +72,7 @@ struct vdpau_render_state {
     union VdpPictureInfo {
         VdpPictureInfoMPEG1Or2 mpeg;
         VdpPictureInfoH264     h264;
+        VdpPictureInfoVC1       vc1;
     } info;
 
     /** Describe size/location of the compressed video data. */
diff --git a/libavcodec/vdpau_internal.h b/libavcodec/vdpau_internal.h
index e957037a8..ce0e00a38 100644
--- a/libavcodec/vdpau_internal.h
+++ b/libavcodec/vdpau_internal.h
@@ -36,4 +36,7 @@ void ff_vdpau_mpeg_picture_complete(MpegEncContext *s, const uint8_t *buf,
 void ff_vdpau_h264_set_reference_frames(MpegEncContext *s);
 void ff_vdpau_h264_picture_complete(MpegEncContext *s);
 
+void ff_vdpau_vc1_decode_picture(MpegEncContext *s, const uint8_t *buf,
+                                 int buf_size);
+
 #endif /* AVCODEC_VDPAU_INTERNAL_H */
diff --git a/libavcodec/vdpauvideo.c b/libavcodec/vdpauvideo.c
index a7c604d1e..8eb806a1a 100644
--- a/libavcodec/vdpauvideo.c
+++ b/libavcodec/vdpauvideo.c
@@ -24,6 +24,7 @@
 #include <limits.h>
 #include "avcodec.h"
 #include "h264.h"
+#include "vc1.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -232,4 +233,73 @@ void ff_vdpau_mpeg_picture_complete(MpegEncContext *s, const uint8_t *buf,
     render->bitstream_buffers_used               = 0;
 }
 
+void ff_vdpau_vc1_decode_picture(MpegEncContext *s, const uint8_t *buf,
+                                 int buf_size)
+{
+    VC1Context *v = s->avctx->priv_data;
+    struct vdpau_render_state * render, * last, * next;
+
+    render = (struct vdpau_render_state*)s->current_picture.data[0];
+    assert(render);
+
+    /*  fill LvPictureInfoVC1 struct */
+    render->info.vc1.frame_coding_mode  = v->fcm;
+    render->info.vc1.postprocflag       = v->postprocflag;
+    render->info.vc1.pulldown           = v->broadcast;
+    render->info.vc1.interlace          = v->interlace;
+    render->info.vc1.tfcntrflag         = v->tfcntrflag;
+    render->info.vc1.finterpflag        = v->finterpflag;
+    render->info.vc1.psf                = v->psf;
+    render->info.vc1.dquant             = v->dquant;
+    render->info.vc1.panscan_flag       = v->panscanflag;
+    render->info.vc1.refdist_flag       = v->refdist_flag;
+    render->info.vc1.quantizer          = v->quantizer_mode;
+    render->info.vc1.extended_mv        = v->extended_mv;
+    render->info.vc1.extended_dmv       = v->extended_dmv;
+    render->info.vc1.overlap            = v->overlap;
+    render->info.vc1.vstransform        = v->vstransform;
+    render->info.vc1.loopfilter         = v->s.loop_filter;
+    render->info.vc1.fastuvmc           = v->fastuvmc;
+    render->info.vc1.range_mapy_flag    = v->range_mapy_flag;
+    render->info.vc1.range_mapy         = v->range_mapy;
+    render->info.vc1.range_mapuv_flag   = v->range_mapuv_flag;
+    render->info.vc1.range_mapuv        = v->range_mapuv;
+    /* Specific to simple/main profile only */
+    render->info.vc1.multires           = v->multires;
+    render->info.vc1.syncmarker         = v->s.resync_marker;
+    render->info.vc1.rangered           = v->rangered;
+    render->info.vc1.maxbframes         = v->s.max_b_frames;
+
+    render->info.vc1.deblockEnable      = v->postprocflag & 1;
+    render->info.vc1.pquant             = v->pq;
+
+    render->info.vc1.forward_reference  = VDP_INVALID_HANDLE;
+    render->info.vc1.backward_reference = VDP_INVALID_HANDLE;
+
+    if (v->bi_type)
+        render->info.vc1.picture_type = 4;
+    else
+        render->info.vc1.picture_type = s->pict_type - 1 + s->pict_type / 3;
+
+    switch(s->pict_type){
+    case  FF_B_TYPE:
+        next = (struct vdpau_render_state*)s->next_picture.data[0];
+        assert(next);
+        render->info.vc1.backward_reference = next->surface;
+        // no break here, going to set forward prediction
+    case  FF_P_TYPE:
+        last = (struct vdpau_render_state*)s->last_picture.data[0];
+        if (!last) // FIXME: Does this test make sense?
+            last = render; // predict second field from the first
+        render->info.vc1.forward_reference = last->surface;
+    }
+
+    ff_vdpau_add_data_chunk(s, buf, buf_size);
+
+    render->info.vc1.slice_count          = 1;
+
+    ff_draw_horiz_band(s, 0, s->avctx->height);
+    render->bitstream_buffers_used        = 0;
+}
+
 /* @}*/
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index fdbce40fa..b02790b4f 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -124,6 +124,8 @@ enum PixelFormat {
     PIX_FMT_VDPAU_H264,///< H264 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
     PIX_FMT_VDPAU_MPEG1,///< MPEG1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
     PIX_FMT_VDPAU_MPEG2,///< MPEG2 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    PIX_FMT_VDPAU_WMV3,///< WMV3 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    PIX_FMT_VDPAU_VC1, ///< VC1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
     PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
 };
 
-- 
2.11.0