From c0e284ccfedafb646c2af33bdd07985231f71916 Mon Sep 17 00:00:00 2001 From: Aniket Masule Date: Tue, 24 Sep 2019 02:47:49 -0300 Subject: [PATCH] media: venus: Update clock scaling Current clock scaling calculations are same for vpu4 and previous versions. For vpu4, Clock scaling calculations are updated with cycles/mb and bitrate. This helps in getting precise clock required. Signed-off-by: Aniket Masule Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/qcom/venus/helpers.c | 157 ++++++++++++++++++++++++---- drivers/media/platform/qcom/venus/helpers.h | 2 +- drivers/media/platform/qcom/venus/vdec.c | 4 +- 3 files changed, 140 insertions(+), 23 deletions(-) diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c index 94fd071217c3..5ea5d90f8e5f 100644 --- a/drivers/media/platform/qcom/venus/helpers.c +++ b/drivers/media/platform/qcom/venus/helpers.c @@ -448,12 +448,32 @@ static int load_scale_bw(struct venus_core *core) return icc_set_bw(core->video_path, total_avg, total_peak); } -int venus_helper_load_scale_clocks(struct venus_core *core) +static int set_clk_freq(struct venus_core *core, unsigned long freq) { + struct clk *clk = core->clks[0]; + int ret; + + ret = clk_set_rate(clk, freq); + if (ret) + return ret; + + ret = clk_set_rate(core->core0_clk, freq); + if (ret) + return ret; + + ret = clk_set_rate(core->core1_clk, freq); + if (ret) + return ret; + + return 0; +} + +static int scale_clocks(struct venus_inst *inst) +{ + struct venus_core *core = inst->core; const struct freq_tbl *table = core->res->freq_tbl; unsigned int num_rows = core->res->freq_tbl_size; unsigned long freq = table[0].freq; - struct clk *clk = core->clks[0]; struct device *dev = core->dev; u32 mbs_per_sec; unsigned int i; @@ -479,28 +499,123 @@ int venus_helper_load_scale_clocks(struct venus_core *core) set_freq: - ret = clk_set_rate(clk, freq); - if (ret) - goto err; + ret = set_clk_freq(core, freq); + if (ret) { + dev_err(dev, "failed to set clock rate %lu (%d)\n", + freq, ret); + return ret; + } - ret = clk_set_rate(core->core0_clk, freq); - if (ret) - goto err; + ret = load_scale_bw(core); + if (ret) { + dev_err(dev, "failed to set bandwidth (%d)\n", + ret); + return ret; + } - ret = clk_set_rate(core->core1_clk, freq); - if (ret) - goto err; + return 0; +} + +static unsigned long calculate_inst_freq(struct venus_inst *inst, + unsigned long filled_len) +{ + unsigned long vpp_freq = 0, vsp_freq = 0; + u64 fps = inst->fps; + u32 mbs_per_sec; + + mbs_per_sec = load_per_instance(inst) / inst->fps; + vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq; + /* 21 / 20 is overhead factor */ + vpp_freq += vpp_freq / 20; + vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq; + + /* 10 / 7 is overhead factor */ + if (inst->session_type == VIDC_SESSION_TYPE_ENC) + vsp_freq += (inst->controls.enc.bitrate * 10) / 7; + else + vsp_freq += ((fps * filled_len * 8) * 10) / 7; + + return max(vpp_freq, vsp_freq); +} + +static int scale_clocks_v4(struct venus_inst *inst) +{ + struct venus_core *core = inst->core; + const struct freq_tbl *table = core->res->freq_tbl; + unsigned int num_rows = core->res->freq_tbl_size; + struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx; + struct device *dev = core->dev; + unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0; + unsigned long filled_len = 0; + struct venus_buffer *buf, *n; + struct vb2_buffer *vb; + int i, ret; + + v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) { + vb = &buf->vb.vb2_buf; + filled_len = max(filled_len, vb2_get_plane_payload(vb, 0)); + } + + if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len) + return 0; + + freq = calculate_inst_freq(inst, filled_len); + inst->clk_data.freq = freq; + + mutex_lock(&core->lock); + list_for_each_entry(inst, &core->instances, list) { + if (inst->clk_data.core_id == VIDC_CORE_ID_1) { + freq_core1 += inst->clk_data.freq; + } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) { + freq_core2 += inst->clk_data.freq; + } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) { + freq_core1 += inst->clk_data.freq; + freq_core2 += inst->clk_data.freq; + } + } + mutex_unlock(&core->lock); + + freq = max(freq_core1, freq_core2); + + if (freq >= table[0].freq) { + freq = table[0].freq; + dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n", + freq, table[0].freq); + goto set_freq; + } + + for (i = num_rows - 1 ; i >= 0; i--) { + if (freq <= table[i].freq) { + freq = table[i].freq; + break; + } + } + +set_freq: + + ret = set_clk_freq(core, freq); + if (ret) { + dev_err(dev, "failed to set clock rate %lu (%d)\n", + freq, ret); + return ret; + } ret = load_scale_bw(core); - if (ret) - goto err; + if (ret) { + dev_err(dev, "failed to set bandwidth (%d)\n", + ret); + return ret; + } return 0; +} -err: - dev_err(dev, "failed to set clock rate %lu or bandwidth (%d)\n", - freq, ret); - return ret; +int venus_helper_load_scale_clocks(struct venus_inst *inst) +{ + if (IS_V4(inst->core)) + return scale_clocks_v4(inst); + + return scale_clocks(inst); } EXPORT_SYMBOL_GPL(venus_helper_load_scale_clocks); @@ -606,6 +721,8 @@ session_process_buf(struct venus_inst *inst, struct vb2_v4l2_buffer *vbuf) if (inst->session_type == VIDC_SESSION_TYPE_DEC) put_ts_metadata(inst, vbuf); + + venus_helper_load_scale_clocks(inst); } else if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { if (inst->session_type == VIDC_SESSION_TYPE_ENC) fdata.buffer_type = HFI_BUFFER_OUTPUT; @@ -874,6 +991,7 @@ int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage) const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE; struct hfi_videocores_usage_type cu; + inst->clk_data.core_id = usage; if (!IS_V4(inst->core)) return 0; @@ -1235,7 +1353,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q) venus_helper_free_dpb_bufs(inst); - venus_helper_load_scale_clocks(core); + venus_helper_load_scale_clocks(inst); INIT_LIST_HEAD(&inst->registeredbufs); } @@ -1288,7 +1406,6 @@ EXPORT_SYMBOL_GPL(venus_helper_process_initial_out_bufs); int venus_helper_vb2_start_streaming(struct venus_inst *inst) { - struct venus_core *core = inst->core; int ret; ret = venus_helper_intbufs_alloc(inst); @@ -1299,7 +1416,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst *inst) if (ret) goto err_bufs_free; - venus_helper_load_scale_clocks(core); + venus_helper_load_scale_clocks(inst); ret = hfi_session_load_res(inst); if (ret) diff --git a/drivers/media/platform/qcom/venus/helpers.h b/drivers/media/platform/qcom/venus/helpers.h index 08abe0293c91..34dcd0c13f06 100644 --- a/drivers/media/platform/qcom/venus/helpers.h +++ b/drivers/media/platform/qcom/venus/helpers.h @@ -60,7 +60,7 @@ int venus_helper_intbufs_free(struct venus_inst *inst); int venus_helper_intbufs_realloc(struct venus_inst *inst); int venus_helper_queue_dpb_bufs(struct venus_inst *inst); int venus_helper_unregister_bufs(struct venus_inst *inst); -int venus_helper_load_scale_clocks(struct venus_core *core); +int venus_helper_load_scale_clocks(struct venus_inst *inst); int venus_helper_process_initial_cap_bufs(struct venus_inst *inst); int venus_helper_process_initial_out_bufs(struct venus_inst *inst); void venus_helper_get_ts_metadata(struct venus_inst *inst, u64 timestamp_us, diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c index 18a861d7f31f..3bd6d5030598 100644 --- a/drivers/media/platform/qcom/venus/vdec.c +++ b/drivers/media/platform/qcom/venus/vdec.c @@ -868,7 +868,7 @@ reconfigure: if (ret) goto free_dpb_bufs; - venus_helper_load_scale_clocks(inst->core); + venus_helper_load_scale_clocks(inst); ret = hfi_session_continue(inst); if (ret) @@ -1076,7 +1076,7 @@ static void vdec_session_release(struct venus_inst *inst) hfi_session_abort(inst); venus_helper_free_dpb_bufs(inst); - venus_helper_load_scale_clocks(core); + venus_helper_load_scale_clocks(inst); INIT_LIST_HEAD(&inst->registeredbufs); mutex_unlock(&inst->lock); -- 2.11.0