static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
- int nb_surfaces = 0;
+ // default minimum of 4 surfaces
+ // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
+ // another multiply by 2 to avoid blocking next PBB group
+ int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
+ // lookahead enabled
if (ctx->rc_lookahead > 0) {
- nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4;
- if (ctx->nb_surfaces < nb_surfaces) {
+ // +1 is to account for lkd_bound calculation later
+ // +4 is to allow sufficient pipelining with lookahead
+ nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
+ if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
+ {
av_log(avctx, AV_LOG_WARNING,
"Defined rc_lookahead requires more surfaces, "
"increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
- ctx->nb_surfaces = nb_surfaces;
}
+ ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
+ } else {
+ if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
+ {
+ av_log(avctx, AV_LOG_WARNING,
+ "Defined b-frame requires more surfaces, "
+ "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
+ ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
+ }
+ else if (ctx->nb_surfaces <= 0)
+ ctx->nb_surfaces = nb_surfaces;
+ // otherwise use user specified value
}
ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+ NvencSurface* tmp_surface = &ctx->surfaces[idx];
NVENCSTATUS nv_status;
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
ctx->surfaces[idx].height = allocSurf.height;
}
- ctx->surfaces[idx].lockCount = 0;
-
/* 1MB is large enough to hold most output frames.
* NVENC increases this automaticaly if it is not enough. */
allocOut.size = 1024 * 1024;
ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
ctx->surfaces[idx].size = allocOut.size;
+ av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL);
+
return 0;
}
ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
if (!ctx->timestamp_list)
return AVERROR(ENOMEM);
+
+ ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
+ if (!ctx->unused_surface_queue)
+ return AVERROR(ENOMEM);
+
ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
if (!ctx->output_surface_queue)
return AVERROR(ENOMEM);
av_fifo_freep(&ctx->timestamp_list);
av_fifo_freep(&ctx->output_surface_ready_queue);
av_fifo_freep(&ctx->output_surface_queue);
+ av_fifo_freep(&ctx->unused_surface_queue);
if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
for (i = 0; i < ctx->nb_surfaces; ++i) {
static NvencSurface *get_free_frame(NvencContext *ctx)
{
- int i;
+ NvencSurface *tmp_surf;
- for (i = 0; i < ctx->nb_surfaces; i++) {
- if (!ctx->surfaces[i].lockCount) {
- ctx->surfaces[i].lockCount = 1;
- return &ctx->surfaces[i];
- }
- }
+ if (!(av_fifo_size(ctx->unused_surface_queue) > 0))
+ // queue empty
+ return NULL;
- return NULL;
+ av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL);
+ return tmp_surf;
}
static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
}
if (res) {
- inSurf->lockCount = 0;
return res;
}
if (res)
return res;
- av_assert0(tmpoutsurf->lockCount);
- tmpoutsurf->lockCount--;
+ av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
*got_packet = 1;
} else {
NV_ENC_OUTPUT_PTR output_surface;
NV_ENC_BUFFER_FORMAT format;
int size;
- int lockCount;
} NvencSurface;
typedef struct NvencDynLoadFunctions
int nb_surfaces;
NvencSurface *surfaces;
+ AVFifoBuffer *unused_surface_queue;
AVFifoBuffer *output_surface_queue;
AVFifoBuffer *output_surface_ready_queue;
AVFifoBuffer *timestamp_list;
0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
{ "rc-lookahead", "Number of frames to look ahead for rate-control",
- OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
- { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, MAX_REGISTERED_FRAMES, VE },
+ OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+ { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
{ "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
{ "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
{ "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",
0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
{ "rc-lookahead", "Number of frames to look ahead for rate-control",
- OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
- { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, MAX_REGISTERED_FRAMES, VE },
+ OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+ { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
{ "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
{ "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
{ "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",