avcodec/nvenc: surface allocation reduction

author Ben Chang <benc@nvidia.com>

Tue, 25 Apr 2017 21:57:56 +0000 (14:57 -0700)

committer Timo Rothenpieler <timo@rothenpieler.org>

Wed, 26 Apr 2017 19:57:54 +0000 (21:57 +0200)
author Ben Chang <benc@nvidia.com>
Tue, 25 Apr 2017 21:57:56 +0000 (14:57 -0700)
committer Timo Rothenpieler <timo@rothenpieler.org>
Wed, 26 Apr 2017 19:57:54 +0000 (21:57 +0200)
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c

index cf05455..00766c2 100644 (file)
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -644,16 +644,34 @@ static void nvenc_override_rate_control(AVCodecContext *avctx)
  static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
  {
      NvencContext *ctx = avctx->priv_data;
-    int nb_surfaces = 0;
+    // default minimum of 4 surfaces
+    // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
+    // another multiply by 2 to avoid blocking next PBB group
+    int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
  
+    // lookahead enabled
      if (ctx->rc_lookahead > 0) {
-        nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4;
-        if (ctx->nb_surfaces < nb_surfaces) {
+        // +1 is to account for lkd_bound calculation later
+        // +4 is to allow sufficient pipelining with lookahead
+        nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
+        if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
+        {
              av_log(avctx, AV_LOG_WARNING,
                     "Defined rc_lookahead requires more surfaces, "
                     "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
-            ctx->nb_surfaces = nb_surfaces;
          }
+        ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
+    } else {
+        if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
+        {
+            av_log(avctx, AV_LOG_WARNING,
+                   "Defined b-frame requires more surfaces, "
+                   "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
+            ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
+        }
+        else if (ctx->nb_surfaces <= 0)
+            ctx->nb_surfaces = nb_surfaces;
+        // otherwise use user specified value
      }
  
      ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
@@ -1086,6 +1104,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
      NvencContext *ctx = avctx->priv_data;
      NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
      NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+    NvencSurface* tmp_surface = &ctx->surfaces[idx];
  
      NVENCSTATUS nv_status;
      NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
@@ -1121,8 +1140,6 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
          ctx->surfaces[idx].height = allocSurf.height;
      }
  
-    ctx->surfaces[idx].lockCount = 0;
-
      /* 1MB is large enough to hold most output frames.
       * NVENC increases this automaticaly if it is not enough. */
      allocOut.size = 1024 * 1024;
@@ -1141,6 +1158,8 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
      ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
      ctx->surfaces[idx].size = allocOut.size;
  
+    av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL);
+
      return 0;
  }
  
@@ -1156,6 +1175,11 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
      ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
      if (!ctx->timestamp_list)
          return AVERROR(ENOMEM);
+
+    ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
+    if (!ctx->unused_surface_queue)
+        return AVERROR(ENOMEM);
+
      ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
      if (!ctx->output_surface_queue)
          return AVERROR(ENOMEM);
@@ -1222,6 +1246,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
      av_fifo_freep(&ctx->timestamp_list);
      av_fifo_freep(&ctx->output_surface_ready_queue);
      av_fifo_freep(&ctx->output_surface_queue);
+    av_fifo_freep(&ctx->unused_surface_queue);
  
      if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
          for (i = 0; i < ctx->nb_surfaces; ++i) {
@@ -1305,16 +1330,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
  
  static NvencSurface *get_free_frame(NvencContext *ctx)
  {
-    int i;
+    NvencSurface *tmp_surf;
  
-    for (i = 0; i < ctx->nb_surfaces; i++) {
-        if (!ctx->surfaces[i].lockCount) {
-            ctx->surfaces[i].lockCount = 1;
-            return &ctx->surfaces[i];
-        }
-    }
+    if (!(av_fifo_size(ctx->unused_surface_queue) > 0))
+        // queue empty
+        return NULL;
  
-    return NULL;
+    av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL);
+    return tmp_surf;
  }
  
  static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
@@ -1712,7 +1735,6 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
          }
  
          if (res) {
-            inSurf->lockCount = 0;
              return res;
          }
  
@@ -1790,8 +1812,7 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
          if (res)
              return res;
  
-        av_assert0(tmpoutsurf->lockCount);
-        tmpoutsurf->lockCount--;
+        av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
  
          *got_packet = 1;
      } else {
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h

index 7dec5cc..763647b 100644 (file)
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -44,7 +44,6 @@ typedef struct NvencSurface
      NV_ENC_OUTPUT_PTR output_surface;
      NV_ENC_BUFFER_FORMAT format;
      int size;
-    int lockCount;
  } NvencSurface;
  
  typedef struct NvencDynLoadFunctions
@@ -110,6 +109,7 @@ typedef struct NvencContext
      int nb_surfaces;
      NvencSurface *surfaces;
  
+    AVFifoBuffer *unused_surface_queue;
      AVFifoBuffer *output_surface_queue;
      AVFifoBuffer *output_surface_ready_queue;
      AVFifoBuffer *timestamp_list;
diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c

index 2c55b60..8d44b1f 100644 (file)
--- a/libavcodec/nvenc_h264.c
+++ b/libavcodec/nvenc_h264.c
@@ -79,8 +79,8 @@ static const AVOption options[] = {
                                                              0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
      { "vbr_2pass",    "Multi-pass variable bitrate mode",   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR },           0, 0, VE, "rc" },
      { "rc-lookahead", "Number of frames to look ahead for rate-control",
-                                                            OFFSET(rc_lookahead), AV_OPT_TYPE_INT,   { .i64 = -1 }, -1, INT_MAX, VE },
-    { "surfaces",     "Number of concurrent surfaces",      OFFSET(nb_surfaces),  AV_OPT_TYPE_INT,   { .i64 = 32 },  0, MAX_REGISTERED_FRAMES, VE },
+                                                            OFFSET(rc_lookahead), AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, VE },
+    { "surfaces",     "Number of concurrent surfaces",      OFFSET(nb_surfaces),  AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
      { "cbr",          "Use cbr encoding mode",              OFFSET(cbr),          AV_OPT_TYPE_BOOL,  { .i64 = 0 },   0, 1, VE },
      { "2pass",        "Use 2pass encoding mode",            OFFSET(twopass),      AV_OPT_TYPE_BOOL,  { .i64 = -1 }, -1, 1, VE },
      { "gpu",          "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c

index c32ba42..6d6750a 100644 (file)
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -78,8 +78,8 @@ static const AVOption options[] = {
                                                              0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
      { "vbr_2pass",    "Multi-pass variable bitrate mode",   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR },           0, 0, VE, "rc" },
      { "rc-lookahead", "Number of frames to look ahead for rate-control",
-                                                            OFFSET(rc_lookahead), AV_OPT_TYPE_INT,   { .i64 = -1 }, -1, INT_MAX, VE },
-    { "surfaces",     "Number of concurrent surfaces",      OFFSET(nb_surfaces),  AV_OPT_TYPE_INT,   { .i64 = 32 },  0, MAX_REGISTERED_FRAMES, VE },
+                                                            OFFSET(rc_lookahead), AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, VE },
+    { "surfaces",     "Number of concurrent surfaces",      OFFSET(nb_surfaces),  AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
      { "cbr",          "Use cbr encoding mode",              OFFSET(cbr),          AV_OPT_TYPE_BOOL,  { .i64 = 0 },   0, 1, VE },
      { "2pass",        "Use 2pass encoding mode",            OFFSET(twopass),      AV_OPT_TYPE_BOOL,  { .i64 = -1 }, -1, 1, VE },
      { "gpu",          "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",
author	Ben Chang <benc@nvidia.com>
	Tue, 25 Apr 2017 21:57:56 +0000 (14:57 -0700)
committer	Timo Rothenpieler <timo@rothenpieler.org>
	Wed, 26 Apr 2017 19:57:54 +0000 (21:57 +0200)
libavcodec/nvenc.c		patch \| blob \| history
libavcodec/nvenc.h		patch \| blob \| history
libavcodec/nvenc_h264.c		patch \| blob \| history
libavcodec/nvenc_hevc.c		patch \| blob \| history