From 9dafa51cf6905fce99c205e8a21bbd7837310805 Mon Sep 17 00:00:00 2001 From: Pengfei Qu Date: Wed, 28 Dec 2016 10:36:40 +0800 Subject: [PATCH] ENC: add const data/table init function for AVC RC logic Signed-off-by: Pengfei Qu Reviewed-by: Sean V Kelley --- src/gen9_avc_encoder.c | 434 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) diff --git a/src/gen9_avc_encoder.c b/src/gen9_avc_encoder.c index e27d8eb..878345e 100755 --- a/src/gen9_avc_encoder.c +++ b/src/gen9_avc_encoder.c @@ -1496,3 +1496,437 @@ gen9_avc_kernel_scaling(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +/* +frame/mb brc related function +*/ +static void +gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct gen9_mfx_avc_img_state *pstate) +{ + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state; + struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state; + + VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param; + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param; + + memset(pstate, 0, sizeof(*pstate)); + + pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 -2; + pstate->dw0.sub_opcode_b = 0; + pstate->dw0.sub_opcode_a = 0; + pstate->dw0.command_opcode = 1; + pstate->dw0.pipeline = 2; + pstate->dw0.command_type = 3; + + pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ; + + pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1; + pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1; + + pstate->dw3.image_structure = 0;//frame is zero + pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; + pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag; + pstate->dw3.brc_domain_rate_control_enable = 0;//1,set for vdenc; + pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset; + pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset; + + pstate->dw4.field_picture_flag = 0; + pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag; + pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag; + pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag; + pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag; + pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag; + pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag; + pstate->dw4.mb_mv_format_flag = 1; + pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc; + pstate->dw4.mv_unpacked_flag = 1; + pstate->dw4.insert_test_flag = 0; + pstate->dw4.load_slice_pointer_flag = 0; + pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */ + pstate->dw4.minimum_frame_size = 0; + pstate->dw5.intra_mb_max_bit_flag = 1; + pstate->dw5.inter_mb_max_bit_flag = 1; + pstate->dw5.frame_size_over_flag = 1; + pstate->dw5.frame_size_under_flag = 1; + pstate->dw5.intra_mb_ipcm_flag = 1; + pstate->dw5.mb_rate_ctrl_flag = 0; /* Always 0 in VDEnc mode */ + pstate->dw5.non_first_pass_flag = 0; + pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0; + pstate->dw5.aq_chroma_disable = 1; + if(pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) + { + pstate->dw5.aq_enable = avc_state->tq_enable; + pstate->dw5.aq_rounding = avc_state->tq_rounding; + }else + { + pstate->dw5.aq_rounding = 0; + } + + pstate->dw6.intra_mb_max_size = 2700; + pstate->dw6.inter_mb_max_size = 4095; + + pstate->dw8.slice_delta_qp_max0 = 0; + pstate->dw8.slice_delta_qp_max1 = 0; + pstate->dw8.slice_delta_qp_max2 = 0; + pstate->dw8.slice_delta_qp_max3 = 0; + + pstate->dw9.slice_delta_qp_min0 = 0; + pstate->dw9.slice_delta_qp_min1 = 0; + pstate->dw9.slice_delta_qp_min2 = 0; + pstate->dw9.slice_delta_qp_min3 = 0; + + pstate->dw10.frame_bitrate_min = 0; + pstate->dw10.frame_bitrate_min_unit = 1; + pstate->dw10.frame_bitrate_min_unit_mode = 1; + pstate->dw10.frame_bitrate_max = (1 << 14) - 1; + pstate->dw10.frame_bitrate_max_unit = 1; + pstate->dw10.frame_bitrate_max_unit_mode = 1; + + pstate->dw11.frame_bitrate_min_delta = 0; + pstate->dw11.frame_bitrate_max_delta = 0; + + pstate->dw12.vad_error_logic = 1; + /* TODO: set paramters DW19/DW20 for slices */ +} + +void gen9_avc_set_image_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct i965_gpe_resource *gpe_resource) +{ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state; + char *pdata; + int i; + unsigned int * data; + struct gen9_mfx_avc_img_state cmd; + + pdata = i965_map_gpe_resource(gpe_resource); + + gen9_avc_init_mfx_avc_img_state(ctx,encode_state,encoder_context,&cmd); + for(i = 0; i < generic_state->num_pak_passes;i++) + { + + if(i == 0) + { + cmd.dw4.macroblock_stat_enable = 0; + cmd.dw5.non_first_pass_flag = 0; + }else + { + cmd.dw4.macroblock_stat_enable = 1; + cmd.dw5.non_first_pass_flag = 1; + cmd.dw5.intra_mb_ipcm_flag = 1; + + } + cmd.dw5.mb_rate_ctrl_flag = 0; + memcpy(pdata,&cmd,sizeof(struct gen9_mfx_avc_img_state)); + data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state)); + *data = MI_BATCH_BUFFER_END; + + pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE; + } + i965_unmap_gpe_resource(gpe_resource); + return; +} + +void gen9_avc_set_image_state_non_brc(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct i965_gpe_resource *gpe_resource) +{ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state; + char *pdata; + + unsigned int * data; + struct gen9_mfx_avc_img_state cmd; + + pdata = i965_map_gpe_resource(gpe_resource); + + gen9_avc_init_mfx_avc_img_state(ctx,encode_state,encoder_context,&cmd); + + if(generic_state->curr_pak_pass == 0) + { + cmd.dw4.macroblock_stat_enable = 0; + cmd.dw5.non_first_pass_flag = 0; + + } + else + { + cmd.dw4.macroblock_stat_enable = 1; + cmd.dw5.non_first_pass_flag = 0; + cmd.dw5.intra_mb_ipcm_flag = 1; + } + + cmd.dw5.mb_rate_ctrl_flag = 0; + memcpy(pdata,&cmd,sizeof(struct gen9_mfx_avc_img_state)); + data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state)); + *data = MI_BATCH_BUFFER_END; + + i965_unmap_gpe_resource(gpe_resource); + return; +} + +static void +gen9_avc_init_brc_const_data(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state; + struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state; + + struct i965_gpe_resource *gpe_resource = NULL; + unsigned char * data =NULL; + unsigned char * data_tmp = NULL; + unsigned int size = 0; + unsigned int table_idx = 0; + unsigned int block_based_skip_enable = avc_state->block_based_skip_enable; + int i = 0; + + struct object_surface *obj_surface; + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param; + VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0]; + VASurfaceID surface_id; + unsigned int transform_8x8_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag; + + gpe_resource = &(avc_ctx->res_brc_const_data_buffer); + assert(gpe_resource); + + i965_zero_gpe_resource(gpe_resource); + + data = i965_map_gpe_resource(gpe_resource); + assert(data); + + table_idx = slice_type_kernel[generic_state->frame_type]; + + /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/ + size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb); + memcpy(data,gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb,size*sizeof(unsigned char)); + + data += size; + + /* skip threshold table*/ + size = 128; + switch(generic_state->frame_type) + { + case SLICE_TYPE_P: + memcpy(data,gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char)); + break; + case SLICE_TYPE_B: + memcpy(data,gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char)); + break; + default: + /*SLICE_TYPE_I,no change */ + break; + } + + if((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) + { + for(i = 0; i< 52 ; i++) + { + *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable,transform_8x8_mode_flag,avc_state->non_ftq_skip_threshold_lut[i]); + } + } + data += size; + + /*fill the qp for ref list*/ + size = 32 + 32 +32 +160; + memset(data,0xff,32); + memset(data+32+32,0xff,32); + switch(generic_state->frame_type) + { + case SLICE_TYPE_P: + { + for(i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList0[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface) + break; + *(data + i) = avc_state->list_ref_idx[0][i];//? + } + } + break; + case SLICE_TYPE_B: + { + data = data + 32 + 32; + for(i = 0 ; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList1[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface) + break; + *(data + i) = avc_state->list_ref_idx[1][i];//? + } + + data = data - 32 - 32; + + for(i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList0[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface) + break; + *(data + i) = avc_state->list_ref_idx[0][i];//? + } + } + break; + default: + /*SLICE_TYPE_I,no change */ + break; + } + data += size; + + /*mv cost and mode cost*/ + size = 1664; + memcpy(data,(unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0],size * sizeof(unsigned char)); + + if(avc_state->old_mode_cost_enable) + { data_tmp = data; + for(i = 0; i < 52 ; i++) + { + *(data_tmp +3) = (unsigned int)gen9_avc_old_intra_mode_cost[i]; + data_tmp += 16; + } + } + + if(avc_state->ftq_skip_threshold_lut_input_enable) + { + for(i = 0; i < 52 ; i++) + { + *(data + (i * 32) + 24) = + *(data + (i * 32) + 25) = + *(data + (i * 32) + 27) = + *(data + (i * 32) + 28) = + *(data + (i * 32) + 29) = + *(data + (i * 32) + 30) = + *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i]; + } + + } + data += size; + + /*ref cost*/ + size = 128; + memcpy(data,(unsigned char *)&gen9_avc_ref_cost[table_idx][0],size * sizeof(unsigned char)); + data += size; + + /*scaling factor*/ + size = 64; + if(avc_state->adaptive_intra_scaling_enable) + { + memcpy(data,(unsigned char *)&gen9_avc_adaptive_intra_scaling_factor,size * sizeof(unsigned char)); + }else + { + memcpy(data,(unsigned char *)&gen9_avc_intra_scaling_factor,size * sizeof(unsigned char)); + } + i965_unmap_gpe_resource(gpe_resource); +} + +static void +gen9_avc_init_brc_const_data_old(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state; + struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state; + + struct i965_gpe_resource *gpe_resource = NULL; + unsigned int * data =NULL; + unsigned int * data_tmp = NULL; + unsigned int size = 0; + unsigned int table_idx = 0; + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param; + unsigned int block_based_skip_enable = avc_state->block_based_skip_enable; + unsigned int transform_8x8_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag; + int i = 0; + + gpe_resource = &(avc_ctx->res_brc_const_data_buffer); + assert(gpe_resource); + + i965_zero_gpe_resource(gpe_resource); + + data = i965_map_gpe_resource(gpe_resource); + assert(data); + + table_idx = slice_type_kernel[generic_state->frame_type]; + + /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/ + size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb); + memcpy(data,gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb,size*sizeof(unsigned char)); + + data += size; + + /* skip threshold table*/ + size = 128; + switch(generic_state->frame_type) + { + case SLICE_TYPE_P: + memcpy(data,gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char)); + break; + case SLICE_TYPE_B: + memcpy(data,gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char)); + break; + default: + /*SLICE_TYPE_I,no change */ + break; + } + + if((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) + { + for(i = 0; i< 52 ; i++) + { + *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable,transform_8x8_mode_flag,avc_state->non_ftq_skip_threshold_lut[i]); + } + } + data += size; + + /*fill the qp for ref list*/ + size = 128; + data += size; + size = 128; + data += size; + + /*mv cost and mode cost*/ + size = 1664; + memcpy(data,(unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0],size * sizeof(unsigned char)); + + if(avc_state->old_mode_cost_enable) + { data_tmp = data; + for(i = 0; i < 52 ; i++) + { + *(data_tmp +3) = (unsigned int)gen9_avc_old_intra_mode_cost[i]; + data_tmp += 16; + } + } + + if(avc_state->ftq_skip_threshold_lut_input_enable) + { + for(i = 0; i < 52 ; i++) + { + *(data + (i * 32) + 24) = + *(data + (i * 32) + 25) = + *(data + (i * 32) + 27) = + *(data + (i * 32) + 28) = + *(data + (i * 32) + 29) = + *(data + (i * 32) + 30) = + *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i]; + } + + } + data += size; + + /*ref cost*/ + size = 128; + memcpy(data,(unsigned char *)&gen9_avc_ref_cost[table_idx][0],size * sizeof(unsigned char)); + + i965_unmap_gpe_resource(gpe_resource); +} -- 2.11.0