#include "internal.h"
#define NVENC_CAP 0x30
-#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
- rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY || \
- rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP)
+#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
+ rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
+ rc == NV_ENC_PARAMS_RC_CBR_HQ)
const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
AV_PIX_FMT_YUV420P,
return ret;
}
+static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
+{
+#if defined(_WIN32) || defined(__CYGWIN__)
+ const char *minver = "378.66";
+#else
+ const char *minver = "378.13";
+#endif
+ av_log(avctx, level, "The minimum required Nvidia driver for nvenc is %s or newer\n", minver);
+}
+
static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
return ret;
ret = nvenc_load_functions(&dl_fn->nvenc_dl);
- if (ret < 0)
+ if (ret < 0) {
+ nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
return ret;
+ }
err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver);
if (err != NV_ENC_SUCCESS)
"Required: %d.%d Found: %d.%d\n",
NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION,
nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
+ nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
return AVERROR(ENOSYS);
}
return AVERROR(ENOSYS);
}
+ ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION);
+ if (ctx->weighted_pred > 0 && ret <= 0) {
+ av_log (avctx, AV_LOG_VERBOSE, "Weighted Prediction not supported\n");
+ return AVERROR(ENOSYS);
+ }
+
return 0;
}
return 0;
fail3:
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
fail2:
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context_internal = NULL;
return AVERROR_BUG;
}
- if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+ if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
AVHWFramesContext *frames_ctx;
+ AVHWDeviceContext *hwdev_ctx;
AVCUDADeviceContext *device_hwctx;
int ret;
- if (!avctx->hw_frames_ctx)
+ if (avctx->hw_frames_ctx) {
+ frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+ device_hwctx = frames_ctx->device_ctx->hwctx;
+ } else if (avctx->hw_device_ctx) {
+ hwdev_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
+ device_hwctx = hwdev_ctx->hwctx;
+ } else {
return AVERROR(EINVAL);
-
- frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
- device_hwctx = frames_ctx->device_ctx->hwctx;
+ }
ctx->cu_context = device_hwctx->cuda_ctx;
return;
}
/* fall through */
- case NV_ENC_PARAMS_RC_2_PASS_VBR:
+ case NV_ENC_PARAMS_RC_VBR_HQ:
case NV_ENC_PARAMS_RC_VBR:
set_vbr(avctx);
break;
case NV_ENC_PARAMS_RC_CBR:
- case NV_ENC_PARAMS_RC_2_PASS_QUALITY:
- case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP:
+ case NV_ENC_PARAMS_RC_CBR_HQ:
+ case NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ:
break;
}
if (ctx->cbr) {
if (ctx->twopass) {
- ctx->rc = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
+ ctx->rc = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ;
} else {
ctx->rc = NV_ENC_PARAMS_RC_CBR;
}
} else if (ctx->cqp >= 0) {
ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
} else if (ctx->twopass) {
- ctx->rc = NV_ENC_PARAMS_RC_2_PASS_VBR;
+ ctx->rc = NV_ENC_PARAMS_RC_VBR_HQ;
} else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP;
}
}
+ if (ctx->rc >= 0 && ctx->rc & RC_MODE_DEPRECATED) {
+ av_log(avctx, AV_LOG_WARNING, "Specified rc mode is deprecated.\n");
+ av_log(avctx, AV_LOG_WARNING, "\tll_2pass_quality -> cbr_ld_hq\n");
+ av_log(avctx, AV_LOG_WARNING, "\tll_2pass_size -> cbr_hq\n");
+ av_log(avctx, AV_LOG_WARNING, "\tvbr_2pass -> vbr_hq\n");
+ av_log(avctx, AV_LOG_WARNING, "\tvbr_minqp -> (no replacement)\n");
+
+ ctx->rc &= ~RC_MODE_DEPRECATED;
+ }
+
if (ctx->flags & NVENC_LOSSLESS) {
set_lossless(avctx);
} else if (ctx->rc >= 0) {
ctx->encode_config.rcParams.zeroReorderDelay = 1;
if (ctx->quality)
- ctx->encode_config.rcParams.targetQuality = ctx->quality;
+ {
+ //convert from float to fixed point 8.8
+ int tmp_quality = (int)(ctx->quality * 256.0f);
+ ctx->encode_config.rcParams.targetQuality = (uint8_t)(tmp_quality >> 8);
+ ctx->encode_config.rcParams.targetQualityLSB = (uint8_t)(tmp_quality & 0xff);
+ }
}
static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
h264->outputPictureTimingSEI = 1;
}
- if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_QUALITY ||
- cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP ||
- cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_VBR) {
+ if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ||
+ cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_HQ ||
+ cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_VBR_HQ) {
h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
h264->fmoMode = NV_ENC_H264_FMO_DISABLE;
}
NV_ENC_PRESET_CONFIG preset_config = { 0 };
NVENCSTATUS nv_status = NV_ENC_SUCCESS;
AVCPBProperties *cpb_props;
+ CUresult cu_res;
+ CUcontext dummy;
int res = 0;
int dw, dh;
ctx->init_encode_params.enableEncodeAsync = 0;
ctx->init_encode_params.enablePTD = 1;
+ if (ctx->weighted_pred == 1)
+ ctx->init_encode_params.enableWeightedPrediction = 1;
+
if (ctx->bluray_compat) {
ctx->aud = 1;
avctx->refs = FFMIN(FFMAX(avctx->refs, 0), 6);
if (res)
return res;
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (nv_status != NV_ENC_SUCCESS) {
return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
}
}
allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
- allocSurf.width = (avctx->width + 31) & ~31;
- allocSurf.height = (avctx->height + 31) & ~31;
- allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+ allocSurf.width = avctx->width;
+ allocSurf.height = avctx->height;
allocSurf.bufferFmt = ctx->surfaces[idx].format;
nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
ctx->surfaces[idx].height = allocSurf.height;
}
- /* 1MB is large enough to hold most output frames.
- * NVENC increases this automaticaly if it is not enough. */
- allocOut.size = 1024 * 1024;
-
- allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
-
nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
if (nv_status != NV_ENC_SUCCESS) {
int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
+ NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
int i, res;
ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
if (!ctx->output_surface_ready_queue)
return AVERROR(ENOMEM);
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
for (i = 0; i < ctx->nb_surfaces; i++) {
if ((res = nvenc_alloc_surface(avctx, i)) < 0)
+ {
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
return res;
+ }
+ }
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
}
return 0;
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
int i;
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
/* the encoder has to be flushed before it can be closed */
if (ctx->nvencoder) {
NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER,
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (ctx->cu_context_internal)
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context = ctx->cu_context_internal = NULL;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
- AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
NV_ENC_REGISTER_RESOURCE reg;
int i, idx, ret;
pic_params.inputBuffer = inSurf->input_surface;
pic_params.bufferFmt = inSurf->format;
- pic_params.inputWidth = avctx->width;
- pic_params.inputHeight = avctx->height;
+ pic_params.inputWidth = inSurf->width;
+ pic_params.inputHeight = inSurf->height;
pic_params.inputPitch = inSurf->pitch;
pic_params.outputBitstream = inSurf->output_surface;
if (output_ready(avctx, !frame)) {
av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
res = process_output_surface(avctx, pkt, tmpoutsurf);
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (res)
return res;