From: Wang Tiatian Date: Fri, 8 Sep 2017 15:13:26 +0000 (-0400) Subject: add init kernel, set curbe, send surface for gen8 avc encoder X-Git-Tag: android-x86-8.1-r1~209 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=718aff6f91efedf34e8422e8d3dd45fe0bddb8ed;p=android-x86%2Fhardware-intel-common-vaapi.git add init kernel, set curbe, send surface for gen8 avc encoder Signed-off-by: Wang Tiatian --- diff --git a/src/gen9_avc_encoder.c b/src/gen9_avc_encoder.c index c465a35..5958110 100644 --- a/src/gen9_avc_encoder.c +++ b/src/gen9_avc_encoder.c @@ -260,6 +260,138 @@ static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_in } }; +static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = { + //unsigned int 0 + { + 0 + }, + + //unsigned int 1 + { + 0 + }, + + //unsigned int 2 + { + 0 + }, + + //unsigned int 3 + { + + 10, + 50 + + }, + + //unsigned int 4 + { + + 100, + 150 + + }, + + //unsigned int 5 + { + 0, 0, 0, 0 + }, + + //unsigned int 6 + { + 0, 0, 0, 0 + }, + + //unsigned int 7 + { + 0 + }, + + //unsigned int 8 + { + + 1, + 1, + 3, + 2 + + }, + + //unsigned int 9 + { + + 1, + 40, + 5, + 5 + + }, + + //unsigned int 10 + { + + 3, + 1, + 7, + 18 + + }, + + //unsigned int 11 + { + + 25, + 37, + 40, + 75 + + }, + + //unsigned int 12 + { + + 97, + 103, + 125, + 160 + + }, + + //unsigned int 13 + { + + -3, + -2, + -1, + 0 + + }, + + //unsigned int 14 + { + + 1, + 2, + 3, + 0xff + + }, + + //unsigned int 15 + { + 0, 0 + }, + + //unsigned int 16 + { + 0, 0 + }, + + //unsigned int 17 + { + 0, 0 + }, +}; static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = { // unsigned int 0 { @@ -997,6 +1129,7 @@ gen9_avc_allocate_resources(VADriverContextP ctx, "brc const data buffer"); if (!allocate_flag) goto failed_allocation; + i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer); if (generic_state->brc_distortion_buffer_supported) { width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64); @@ -1104,6 +1237,7 @@ gen9_avc_allocate_resources(VADriverContextP ctx, "sfd output buffer"); if (!allocate_flag) goto failed_allocation; + i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer); i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer); size = ALIGN(52, 64); @@ -1526,6 +1660,7 @@ gen9_avc_send_surface_scaling(VADriverContextP ctx, void *param) { struct scaling_param *surface_param = (struct scaling_param *)param; + struct i965_driver_data *i965 = i965_driver_data(ctx); unsigned int surface_format; unsigned int res_size; @@ -1547,24 +1682,44 @@ gen9_avc_send_surface_scaling(VADriverContextP ctx, GEN9_AVC_SCALING_FRAME_DST_Y_INDEX); /*add buffer mv_proc_stat, here need change*/ - if (surface_param->mbv_proc_stat_enabled) { - res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int); + if (IS_GEN8(i965->intel.device_info)) { + if (surface_param->mbv_proc_stat_enabled) { + res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int); - i965_add_buffer_gpe_surface(ctx, - gpe_context, - surface_param->pres_mbv_proc_stat_buffer, - 0, - res_size / 4, - 0, - GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX); - } else if (surface_param->enable_mb_flatness_check) { - i965_add_buffer_2d_gpe_surface(ctx, gpe_context, - surface_param->pres_flatness_check_surface, - 1, - I965_SURFACEFORMAT_R8_UNORM, - GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX); - } + i965_add_buffer_gpe_surface(ctx, + gpe_context, + surface_param->pres_mbv_proc_stat_buffer, + 0, + res_size / 4, + 0, + GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM); + } + if (surface_param->enable_mb_flatness_check) { + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + surface_param->pres_flatness_check_surface, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN8_SCALING_FRAME_FLATNESS_DST_CM); + } + } else { + if (surface_param->mbv_proc_stat_enabled) { + res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int); + i965_add_buffer_gpe_surface(ctx, + gpe_context, + surface_param->pres_mbv_proc_stat_buffer, + 0, + res_size / 4, + 0, + GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX); + } else if (surface_param->enable_mb_flatness_check) { + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + surface_param->pres_flatness_check_surface, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX); + } + } return; } @@ -1827,6 +1982,95 @@ gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx, /* set paramters DW19/DW20 for slices */ } +static void +gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct gen8_mfx_avc_img_state *pstate) +{ + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state; + struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state; + + VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param; + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param; + + memset(pstate, 0, sizeof(*pstate)); + + pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2; + pstate->dw0.command_sub_opcode_b = 0; + pstate->dw0.command_sub_opcode_a = 0; + pstate->dw0.command_opcode = 1; + pstate->dw0.command_pipeline = 2; + pstate->dw0.command_type = 3; + + pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ; + + pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1; + pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1; + + pstate->dw3.image_structure = 0;//frame is zero + pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; + pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag; + pstate->dw3.inter_mb_conf_flag = 0; + pstate->dw3.intra_mb_conf_flag = 0; + pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset; + pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset; + + pstate->dw4.field_picture_flag = 0; + pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag; + pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag; + pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag; + pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag; + pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag; + pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag; + pstate->dw4.mb_mv_format_flag = 1; + pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc; + pstate->dw4.mv_unpacked_flag = 1; + pstate->dw4.insert_test_flag = 0; + pstate->dw4.load_slice_pointer_flag = 0; + pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */ + pstate->dw4.minimum_frame_size = 0; + pstate->dw5.intra_mb_max_bit_flag = 1; + pstate->dw5.inter_mb_max_bit_flag = 1; + pstate->dw5.frame_size_over_flag = 1; + pstate->dw5.frame_size_under_flag = 1; + pstate->dw5.intra_mb_ipcm_flag = 1; + pstate->dw5.mb_rate_ctrl_flag = 0; + pstate->dw5.non_first_pass_flag = 0; + pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0; + pstate->dw5.aq_chroma_disable = 1; + if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) { + pstate->dw5.aq_enable = avc_state->tq_enable; + pstate->dw5.aq_rounding = avc_state->tq_rounding; + } else { + pstate->dw5.aq_rounding = 0; + } + + pstate->dw6.intra_mb_max_size = 2700; + pstate->dw6.inter_mb_max_size = 4095; + + pstate->dw8.slice_delta_qp_max0 = 0; + pstate->dw8.slice_delta_qp_max1 = 0; + pstate->dw8.slice_delta_qp_max2 = 0; + pstate->dw8.slice_delta_qp_max3 = 0; + + pstate->dw9.slice_delta_qp_min0 = 0; + pstate->dw9.slice_delta_qp_min1 = 0; + pstate->dw9.slice_delta_qp_min2 = 0; + pstate->dw9.slice_delta_qp_min3 = 0; + + pstate->dw10.frame_bitrate_min = 0; + pstate->dw10.frame_bitrate_min_unit = 1; + pstate->dw10.frame_bitrate_min_unit_mode = 1; + pstate->dw10.frame_bitrate_max = (1 << 14) - 1; + pstate->dw10.frame_bitrate_max_unit = 1; + pstate->dw10.frame_bitrate_max_unit_mode = 1; + + pstate->dw11.frame_bitrate_min_delta = 0; + pstate->dw11.frame_bitrate_max_delta = 0; + /* set paramters DW19/DW20 for slices */ +} void gen9_avc_set_image_state(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context, @@ -1867,6 +2111,47 @@ void gen9_avc_set_image_state(VADriverContextP ctx, return; } +void gen8_avc_set_image_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct i965_gpe_resource *gpe_resource) +{ + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state; + char *pdata; + int i; + unsigned int * data; + struct gen8_mfx_avc_img_state cmd; + + pdata = i965_map_gpe_resource(gpe_resource); + + if (!pdata) + return; + + gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd); + for (i = 0; i < generic_state->num_pak_passes; i++) { + + if (i == 0) { + cmd.dw4.macroblock_stat_enable = 0; + cmd.dw5.non_first_pass_flag = 0; + } else { + cmd.dw4.macroblock_stat_enable = 1; + cmd.dw5.non_first_pass_flag = 1; + cmd.dw5.intra_mb_ipcm_flag = 1; + cmd.dw3.inter_mb_conf_flag = 1; + cmd.dw3.intra_mb_conf_flag = 1; + } + cmd.dw5.mb_rate_ctrl_flag = 0; + memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state)); + data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state)); + *data = MI_BATCH_BUFFER_END; + + pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE; + } + i965_unmap_gpe_resource(gpe_resource); + return; +} + void gen9_avc_set_image_state_non_brc(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context, @@ -2576,7 +2861,8 @@ gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx, unsigned char is_g95 = 0; if (IS_SKL(i965->intel.device_info) || - IS_BXT(i965->intel.device_info)) + IS_BXT(i965->intel.device_info) || + IS_GEN8(i965->intel.device_info)) is_g95 = 0; else if (IS_KBL(i965->intel.device_info) || IS_GLK(i965->intel.device_info)) @@ -2662,14 +2948,22 @@ gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx, (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX)); /* MB statistical data surface*/ - i965_add_buffer_gpe_surface(ctx, - gpe_context, - &avc_ctx->res_mb_status_buffer, - 0, - avc_ctx->res_mb_status_buffer.size, - 0, - (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX)); - + if (!IS_GEN8(i965->intel.device_info)) { + i965_add_buffer_gpe_surface(ctx, + gpe_context, + &avc_ctx->res_mb_status_buffer, + 0, + avc_ctx->res_mb_status_buffer.size, + 0, + (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX)); + } else { + i965_add_buffer_2d_gpe_surface(ctx, + gpe_context, + &avc_ctx->res_mbbrc_mb_qp_data_surface, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX); + } return; } @@ -2774,7 +3068,11 @@ gen9_avc_kernel_brc_frame_update(VADriverContextP ctx, gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context); } /* image state construct*/ - gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer)); + if (IS_GEN8(i965->intel.device_info)) { + gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer)); + } else { + gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer)); + } /* set surface frame mbenc*/ generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param); @@ -3281,7 +3579,6 @@ gen9_avc_set_curbe_mbenc(VADriverContextP ctx, { memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int)); if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) { - //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp]; } else if (avc_state->skip_bias_adjustment_enable) { /* Load different MvCost for P picture when SkipBiasAdjustment is enabled // No need to check for P picture as the flag is only enabled for P picture */ @@ -4199,19 +4496,35 @@ gen9_avc_send_surface_mbenc(VADriverContextP ctx, if (generic_state->hme_enabled) { /*memv input 4x*/ - gpe_resource = &(avc_ctx->s4x_memv_data_buffer); - i965_add_buffer_2d_gpe_surface(ctx, gpe_context, - gpe_resource, - 1, - I965_SURFACEFORMAT_R8_UNORM, - GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX); - /* memv distortion input*/ - gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer); - i965_add_buffer_2d_gpe_surface(ctx, gpe_context, - gpe_resource, - 1, - I965_SURFACEFORMAT_R8_UNORM, - GEN9_AVC_MBENC_4XME_DISTORTION_INDEX); + if (!IS_GEN8(i965->intel.device_info)) { + gpe_resource = &(avc_ctx->s4x_memv_data_buffer); + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX); + /* memv distortion input*/ + gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer); + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_MBENC_4XME_DISTORTION_INDEX); + } else if (generic_state->frame_type != SLICE_TYPE_I) { + gpe_resource = &(avc_ctx->s4x_memv_data_buffer); + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX); + /* memv distortion input*/ + gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer); + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_MBENC_4XME_DISTORTION_INDEX); + } } /*mbbrc const data_buffer*/ @@ -4348,21 +4661,28 @@ gen9_avc_send_surface_mbenc(VADriverContextP ctx, GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX); } + if (!IS_GEN8(i965->intel.device_info)) { + if (param->mb_vproc_stats_enable) { + /*mb status buffer input*/ + size = frame_mb_size * 16 * 4; + gpe_resource = &(avc_ctx->res_mb_status_buffer); + i965_add_buffer_gpe_surface(ctx, + gpe_context, + gpe_resource, + 0, + size / 4, + 0, + GEN9_AVC_MBENC_MB_STATS_INDEX); - if (param->mb_vproc_stats_enable) { - /*mb status buffer input*/ - size = frame_mb_size * 16 * 4; - gpe_resource = &(avc_ctx->res_mb_status_buffer); - i965_add_buffer_gpe_surface(ctx, - gpe_context, - gpe_resource, - 0, - size / 4, - 0, - GEN9_AVC_MBENC_MB_STATS_INDEX); - + } else if (avc_state->flatness_check_enable) { + gpe_resource = &(avc_ctx->res_flatness_check_surface); + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_MBENC_MB_STATS_INDEX); + } } else if (avc_state->flatness_check_enable) { - gpe_resource = &(avc_ctx->res_flatness_check_surface); i965_add_buffer_2d_gpe_surface(ctx, gpe_context, gpe_resource, @@ -4420,25 +4740,38 @@ gen9_avc_send_surface_mbenc(VADriverContextP ctx, I965_SURFACEFORMAT_R8_UNORM, (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX)); } + if (IS_GEN8(i965->intel.device_info)) { + if (avc_state->sfd_enable) { + size = 128 / sizeof(unsigned long); + gpe_resource = &(avc_ctx->res_sfd_output_buffer); + i965_add_buffer_gpe_surface(ctx, + gpe_context, + gpe_resource, + 0, + size / 4, + 0, + GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM); - if (avc_state->sfd_enable && generic_state->hme_enabled) { - if (generic_state->frame_type == SLICE_TYPE_P) { - gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer); - - } else if (generic_state->frame_type == SLICE_TYPE_B) { - gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer); } + } else { + if (avc_state->sfd_enable && generic_state->hme_enabled) { + if (generic_state->frame_type == SLICE_TYPE_P) { + gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer); + } else if (generic_state->frame_type == SLICE_TYPE_B) { + gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer); + } + if (generic_state->frame_type != SLICE_TYPE_I) { + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX)); - if (generic_state->frame_type != SLICE_TYPE_I) { - i965_add_buffer_2d_gpe_surface(ctx, gpe_context, - gpe_resource, - 1, - I965_SURFACEFORMAT_R8_UNORM, - (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX)); + + } } } } - return; } @@ -5470,168 +5803,794 @@ gen9_avc_kernel_wp(VADriverContextP ctx, /*set curbe*/ generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, ¶m); - /*send surface*/ - generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m); + /*send surface*/ + generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m); + + gpe->setup_interface_data(ctx, gpe_context); + + memset(&kernel_walker_param, 0, sizeof(kernel_walker_param)); + /* the scaling is based on 8x8 blk level */ + kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs; + kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs; + kernel_walker_param.no_dependency = 1; + + i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param); + + gen9_avc_run_kernel_media_object_walker(ctx, encoder_context, + gpe_context, + media_function, + &media_object_walker_param); + + return VA_STATUS_SUCCESS; +} + + +/* +sfd related function +*/ +static void +gen9_avc_set_curbe_sfd(VADriverContextP ctx, + struct encode_state *encode_state, + struct i965_gpe_context *gpe_context, + struct intel_encoder_context *encoder_context, + void * param) +{ + gen9_avc_sfd_curbe_data *cmd; + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state; + struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state; + VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0]; + + cmd = i965_gpe_context_map_curbe(gpe_context); + + if (!cmd) + return; + memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data)); + + cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ; + cmd->dw0.enable_adaptive_mv_stream_in = 0 ; + cmd->dw0.stream_in_type = 7 ; + cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ; + cmd->dw0.brc_mode_enable = generic_state->brc_enabled ; + cmd->dw0.vdenc_mode_disable = 1 ; + + cmd->dw1.hme_stream_in_ref_cost = 5 ; + cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ; + cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ; + + cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ; + cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ; + + cmd->dw3.large_mv_threshold = 128 ; + cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ; + cmd->dw5.zmv_threshold = 4 ; + cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60; + cmd->dw7.min_dist_threshold = 10 ; + + if (generic_state->frame_type == SLICE_TYPE_P) { + memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char)); + + } else if (generic_state->frame_type == SLICE_TYPE_B) { + memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char)); + } + + cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ; + cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ; + cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ; + cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ; + cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ; + cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ; + cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ; + + i965_gpe_context_unmap_curbe(gpe_context); + +} + +static void +gen9_avc_send_surface_sfd(VADriverContextP ctx, + struct encode_state *encode_state, + struct i965_gpe_context *gpe_context, + struct intel_encoder_context *encoder_context, + void * param) +{ + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx; + struct i965_gpe_resource *gpe_resource; + int size = 0; + + /*HME mv data surface memv output 4x*/ + gpe_resource = &avc_ctx->s4x_memv_data_buffer; + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX); + + /* memv distortion */ + gpe_resource = &avc_ctx->s4x_memv_distortion_buffer; + i965_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX); + /*buffer output*/ + size = 32 * 4 * 4; + gpe_resource = &avc_ctx->res_sfd_output_buffer; + i965_add_buffer_gpe_surface(ctx, + gpe_context, + gpe_resource, + 0, + size / 4, + 0, + GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX); + +} + +static VAStatus +gen9_avc_kernel_sfd(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct i965_gpe_table *gpe = &i965->gpe_table; + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx; + struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx; + + struct i965_gpe_context *gpe_context; + struct gpe_media_object_parameter media_object_param; + struct gpe_media_object_inline_data media_object_inline_data; + int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION; + gpe_context = &(avc_ctx->context_sfd.gpe_contexts); + + gpe->context_init(ctx, gpe_context); + gpe->reset_binding_table(ctx, gpe_context); + + /*set curbe*/ + generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL); + + /*send surface*/ + generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL); + + gpe->setup_interface_data(ctx, gpe_context); + + memset(&media_object_param, 0, sizeof(media_object_param)); + memset(&media_object_inline_data, 0, sizeof(media_object_inline_data)); + media_object_param.pinline_data = &media_object_inline_data; + media_object_param.inline_size = sizeof(media_object_inline_data); + + gen9_avc_run_kernel_media_object(ctx, encoder_context, + gpe_context, + media_function, + &media_object_param); + + return VA_STATUS_SUCCESS; +} + +static void +gen8_avc_set_curbe_mbenc(VADriverContextP ctx, + struct encode_state *encode_state, + struct i965_gpe_context *gpe_context, + struct intel_encoder_context *encoder_context, + void * param) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + gen8_avc_mbenc_curbe_data *cmd; + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state; + struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state; + + VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0]; + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param; + VASurfaceID surface_id; + struct object_surface *obj_surface; + + struct mbenc_param * curbe_param = (struct mbenc_param *)param ; + unsigned char qp = 0; + unsigned char me_method = 0; + unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use; + unsigned int table_idx = 0; + unsigned int curbe_size = 0; + + unsigned int preset = generic_state->preset; + if (IS_GEN8(i965->intel.device_info)) { + cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context); + if (!cmd) + return; + curbe_size = sizeof(gen8_avc_mbenc_curbe_data); + memset(cmd, 0, curbe_size); + + if (mbenc_i_frame_dist_in_use) { + memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size); + } else { + switch (generic_state->frame_type) { + case SLICE_TYPE_I: + memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size); + break; + case SLICE_TYPE_P: + memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size); + break; + case SLICE_TYPE_B: + memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size); + break; + default: + assert(0); + } + } + } else { + assert(0); + + return; + } + + me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset]; + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + + cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset]; + cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset]; + cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable; + cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable; + + cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset]; + cmd->dw38.max_len_sp = 0; + + cmd->dw3.src_access = 0; + cmd->dw3.ref_access = 0; + + if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) { + //disable ftq_override by now. + if (avc_state->ftq_override) { + cmd->dw3.ftq_enable = avc_state->ftq_enable; + + } else { + if (generic_state->frame_type == SLICE_TYPE_P) { + cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01; + + } else { + cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01; + } + } + } else { + cmd->dw3.ftq_enable = 0; + } + + if (avc_state->disable_sub_mb_partion) + cmd->dw3.sub_mb_part_mask = 0x7; + + if (mbenc_i_frame_dist_in_use) { + cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb; + cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1; + cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4; + cmd->dw6.batch_buffer_end = 0; + cmd->dw31.intra_compute_type = 1; + } else { + cmd->dw2.pitch_width = generic_state->frame_width_in_mbs; + cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1; + cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height; + + { + memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int)); + if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) { + } else if (avc_state->skip_bias_adjustment_enable) { + /* Load different MvCost for P picture when SkipBiasAdjustment is enabled + // No need to check for P picture as the flag is only enabled for P picture */ + cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp]; + } + } + table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0; + memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int)); + } + cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled; + cmd->dw4.field_parity_flag = 0;//bottom field + cmd->dw4.enable_cur_fld_idr = 0;//field realted + cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag; + cmd->dw4.hme_enable = generic_state->hme_enabled; + cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type]; + cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0); + + cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02; + cmd->dw7.src_field_polarity = 0;//field related + + /*ftq_skip_threshold_lut set,dw14 /15*/ + + /*r5 disable NonFTQSkipThresholdLUT*/ + if (generic_state->frame_type == SLICE_TYPE_P) { + cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp]; + } else if (generic_state->frame_type == SLICE_TYPE_B) { + cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp]; + } + + cmd->dw13.qp_prime_y = qp; + cmd->dw13.qp_prime_cb = qp; + cmd->dw13.qp_prime_cr = qp; + cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable + + if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) { + switch (gen9_avc_multi_pred[preset]) { + case 0: + cmd->dw32.mult_pred_l0_disable = 128; + cmd->dw32.mult_pred_l1_disable = 128; + break; + case 1: + cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128; + cmd->dw32.mult_pred_l1_disable = 128; + break; + case 2: + cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128; + cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128; + break; + case 3: + cmd->dw32.mult_pred_l0_disable = 1; + cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128; + break; + } + + } else { + cmd->dw32.mult_pred_l0_disable = 128; + cmd->dw32.mult_pred_l1_disable = 128; + } + + if (generic_state->frame_type == SLICE_TYPE_B) { + cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only + cmd->dw34.list1_ref_id0_frm_field_parity = 0; + cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag; + } + + cmd->dw34.b_original_bff = 0; //frame only + cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable; + cmd->dw34.roi_enable_flag = curbe_param->roi_enabled; + cmd->dw34.mad_enable_falg = avc_state->mad_enable; + cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled; + cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice; + cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable; + + if (cmd->dw34.force_non_skip_check) { + cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable; + } + + cmd->dw36.check_all_fractional_enable = avc_state->caf_enable; + cmd->dw38.ref_threshold = 400; + cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset]; + cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2; + + if (mbenc_i_frame_dist_in_use) { + cmd->dw13.qp_prime_y = 0; + cmd->dw13.qp_prime_cb = 0; + cmd->dw13.qp_prime_cr = 0; + cmd->dw33.intra_16x16_nondc_penalty = 0; + cmd->dw33.intra_8x8_nondc_penalty = 0; + cmd->dw33.intra_4x4_nondc_penalty = 0; + } + if (cmd->dw4.use_actual_ref_qp_value) { + cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0); + cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1); + cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2); + cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3); + cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4); + cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5); + cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6); + cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7); + cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0); + cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1); + } + + table_idx = slice_type_kernel[generic_state->frame_type]; + cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp]; + if (generic_state->frame_type == SLICE_TYPE_I) { + cmd->dw0.skip_mode_enable = 0; + cmd->dw37.skip_mode_enable = 0; + cmd->dw36.hme_combine_overlap = 0; + cmd->dw47.intra_cost_sf = 16; + cmd->dw34.enable_direct_bias_adjustment = 0; + cmd->dw34.enable_global_motion_bias_adjustment = 0; + + } else if (generic_state->frame_type == SLICE_TYPE_P) { + cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2; + cmd->dw3.bme_disable_fbr = 1; + cmd->dw5.ref_width = gen9_avc_search_x[preset]; + cmd->dw5.ref_height = gen9_avc_search_y[preset]; + cmd->dw7.non_skip_zmv_added = 1; + cmd->dw7.non_skip_mode_added = 1; + cmd->dw7.skip_center_mask = 1; + cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp]; + cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys + cmd->dw36.hme_combine_overlap = 1; + cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0; + cmd->dw39.ref_width = gen9_avc_search_x[preset]; + cmd->dw39.ref_height = gen9_avc_search_y[preset]; + cmd->dw34.enable_direct_bias_adjustment = 0; + cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable; + if (avc_state->global_motion_bias_adjustment_enable) + cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor; + } else { + cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2; + cmd->dw1.bi_weight = avc_state->bi_weight; + cmd->dw3.search_ctrl = 7; + cmd->dw3.skip_type = 1; + cmd->dw5.ref_width = gen9_avc_b_search_x[preset]; + cmd->dw5.ref_height = gen9_avc_b_search_y[preset]; + cmd->dw7.skip_center_mask = 0xff; + cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp]; + cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only + cmd->dw36.hme_combine_overlap = 1; + surface_id = slice_param->RefPicList1[0].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface) { + WARN_ONCE("Invalid backward reference frame\n"); + return; + } + cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE); + cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0; + cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0; + cmd->dw39.ref_width = gen9_avc_b_search_x[preset]; + cmd->dw39.ref_height = gen9_avc_b_search_y[preset]; + cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0]; + cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1]; + cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2]; + cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3]; + cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4]; + cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5]; + cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6]; + cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7]; + cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable; + if (cmd->dw34.enable_direct_bias_adjustment) { + cmd->dw7.non_skip_zmv_added = 1; + cmd->dw7.non_skip_mode_added = 1; + } + + cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable; + if (avc_state->global_motion_bias_adjustment_enable) + cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor; + } + avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable; + + if (avc_state->rolling_intra_refresh_enable) { + /*by now disable it*/ + if (generic_state->brc_enabled) { + cmd->dw4.enable_intra_refresh = false; + cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable; + cmd->dw48.widi_intra_refresh_mbx = 0; + cmd->dw58.widi_intra_refresh_mby = 0; + } else { + cmd->dw4.enable_intra_refresh = true; + cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable; + } + cmd->dw32.mult_pred_l0_disable = 128; + /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks + across one P frame to another P frame, as needed by the RollingI algo */ + cmd->dw48.widi_intra_refresh_mbx = 0; + cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0; + cmd->dw48.widi_intra_refresh_qp_delta = 0; + + } else { + cmd->dw34.widi_intra_refresh_en = 0; + } + + /*roi set disable by now. 49-56*/ + if (curbe_param->roi_enabled) { + cmd->dw49.roi_1_x_left = generic_state->roi[0].left; + cmd->dw49.roi_1_y_top = generic_state->roi[0].top; + cmd->dw50.roi_1_x_right = generic_state->roi[0].right; + cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom; + + cmd->dw51.roi_2_x_left = generic_state->roi[1].left; + cmd->dw51.roi_2_y_top = generic_state->roi[1].top; + cmd->dw52.roi_2_x_right = generic_state->roi[1].right; + cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom; + + cmd->dw53.roi_3_x_left = generic_state->roi[2].left; + cmd->dw53.roi_3_y_top = generic_state->roi[2].top; + cmd->dw54.roi_3_x_right = generic_state->roi[2].right; + cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom; + + cmd->dw55.roi_4_x_left = generic_state->roi[3].left; + cmd->dw55.roi_4_y_top = generic_state->roi[3].top; + cmd->dw56.roi_4_x_right = generic_state->roi[3].right; + cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom; + + cmd->dw36.enable_cabac_work_around = 0; + + if (!generic_state->brc_enabled) { + char tmp = 0; + tmp = generic_state->roi[0].value; + CLIP(tmp, -qp, AVC_QP_MAX - qp); + cmd->dw57.roi_1_dqp_prime_y = tmp; + tmp = generic_state->roi[1].value; + CLIP(tmp, -qp, AVC_QP_MAX - qp); + cmd->dw57.roi_2_dqp_prime_y = tmp; + tmp = generic_state->roi[2].value; + CLIP(tmp, -qp, AVC_QP_MAX - qp); + cmd->dw57.roi_3_dqp_prime_y = tmp; + tmp = generic_state->roi[3].value; + CLIP(tmp, -qp, AVC_QP_MAX - qp); + cmd->dw57.roi_4_dqp_prime_y = tmp; + } else { + cmd->dw34.roi_enable_flag = 0; + } + } + + cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM; + cmd->dw66.mv_data_surf_index = GEN8_AVC_MBENC_IND_MV_DATA_CM; + cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM; + cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM; + cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM; + cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM; + cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM; + cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM; + cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM; + cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM; + cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM; + cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM; + cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM; + cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM; + cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM; + cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM; + cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM; + cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM; + cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM; + cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM; + cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM; + cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM; + + i965_gpe_context_unmap_curbe(gpe_context); + + return; +} + +static void +gen8_avc_set_curbe_scaling4x(VADriverContextP ctx, + struct encode_state *encode_state, + struct i965_gpe_context *gpe_context, + struct intel_encoder_context *encoder_context, + void *param) +{ + gen8_avc_scaling4x_curbe_data *curbe_cmd; + struct scaling_param *surface_param = (struct scaling_param *)param; + + curbe_cmd = i965_gpe_context_map_curbe(gpe_context); + + if (!curbe_cmd) + return; + + memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data)); + + curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width; + curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height; + + curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM; + curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM; + + curbe_cmd->dw5.flatness_threshold = 0; + if (surface_param->enable_mb_flatness_check) { + curbe_cmd->dw5.flatness_threshold = 128; + curbe_cmd->dw8.flatness_output_bti_top_field = 4; + } + + curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check; + curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output; + curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output; + + if (curbe_cmd->dw6.enable_mb_variance_output || + curbe_cmd->dw6.enable_mb_pixel_average_output) { + curbe_cmd->dw10.mbv_proc_states_bti_top_field = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM; + curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM; + } + + i965_gpe_context_unmap_curbe(gpe_context); + return; +} + +static void +gen8_avc_set_curbe_me(VADriverContextP ctx, + struct encode_state *encode_state, + struct i965_gpe_context *gpe_context, + struct intel_encoder_context *encoder_context, + void * param) +{ + gen8_avc_me_curbe_data *curbe_cmd; + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state; + struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state; + + VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0]; + + struct me_param * curbe_param = (struct me_param *)param ; + unsigned char use_mv_from_prev_step = 0; + unsigned char write_distortions = 0; + unsigned char qp_prime_y = 0; + unsigned char me_method = gen9_avc_p_me_method[generic_state->preset]; + unsigned char seach_table_idx = 0; + unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0; + unsigned int downscaled_width_in_mb, downscaled_height_in_mb; + unsigned int scale_factor = 0; + + qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta; + switch (curbe_param->hme_type) { + case INTEL_ENC_HME_4x : { + use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0; + write_distortions = 1; + mv_shift_factor = 2; + scale_factor = 4; + prev_mv_read_pos_factor = 0; + break; + } + case INTEL_ENC_HME_16x : { + use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0; + write_distortions = 0; + mv_shift_factor = 2; + scale_factor = 16; + prev_mv_read_pos_factor = 1; + break; + } + case INTEL_ENC_HME_32x : { + use_mv_from_prev_step = 0; + write_distortions = 0; + mv_shift_factor = 1; + scale_factor = 32; + prev_mv_read_pos_factor = 0; + break; + } + default: + assert(0); + + } + curbe_cmd = i965_gpe_context_map_curbe(gpe_context); + + if (!curbe_cmd) + return; + + downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16; + downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16; + + memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data)); + + curbe_cmd->dw3.sub_pel_mode = 3; + if (avc_state->field_scaling_output_interleaved) { + /*frame set to zero,field specified*/ + curbe_cmd->dw3.src_access = 0; + curbe_cmd->dw3.ref_access = 0; + curbe_cmd->dw7.src_field_polarity = 0; + } + curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1; + curbe_cmd->dw4.picture_width = downscaled_width_in_mb; + curbe_cmd->dw5.qp_prime_y = qp_prime_y; + + curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step; + curbe_cmd->dw6.write_distortions = write_distortions; + curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset]; + curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only + + if (generic_state->frame_type == SLICE_TYPE_B) { + curbe_cmd->dw1.bi_weight = 32; + curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1; + me_method = gen9_avc_b_me_method[generic_state->preset]; + seach_table_idx = 1; + } - gpe->setup_interface_data(ctx, gpe_context); + if (generic_state->frame_type == SLICE_TYPE_P || + generic_state->frame_type == SLICE_TYPE_B) + curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1; - memset(&kernel_walker_param, 0, sizeof(kernel_walker_param)); - /* the scaling is based on 8x8 blk level */ - kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs; - kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs; - kernel_walker_param.no_dependency = 1; + curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor; + curbe_cmd->dw15.mv_shift_factor = mv_shift_factor; - i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param); + memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int)); - gen9_avc_run_kernel_media_object_walker(ctx, encoder_context, - gpe_context, - media_function, - &media_object_walker_param); + curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM; + curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ; + curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM; + curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM; + curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM; + curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM; + curbe_cmd->dw38.reserved = 0; - return VA_STATUS_SUCCESS; + i965_gpe_context_unmap_curbe(gpe_context); + return; } - -/* -sfd related function -*/ static void -gen9_avc_set_curbe_sfd(VADriverContextP ctx, - struct encode_state *encode_state, - struct i965_gpe_context *gpe_context, - struct intel_encoder_context *encoder_context, - void * param) +gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx, + struct encode_state *encode_state, + struct i965_gpe_context *gpe_context, + struct intel_encoder_context *encoder_context, + void * param) { - gen9_avc_sfd_curbe_data *cmd; + gen8_avc_frame_brc_update_curbe_data *cmd; struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state; struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state; - VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0]; + struct object_surface *obj_surface; + struct gen9_surface_avc *avc_priv_surface; + struct avc_param common_param; - cmd = i965_gpe_context_map_curbe(gpe_context); + obj_surface = encode_state->reconstructed_object; - if (!cmd) + if (!obj_surface || !obj_surface->private_data) return; - memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data)); - - cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ; - cmd->dw0.enable_adaptive_mv_stream_in = 0 ; - cmd->dw0.stream_in_type = 7 ; - cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ; - cmd->dw0.brc_mode_enable = generic_state->brc_enabled ; - cmd->dw0.vdenc_mode_disable = 1 ; - - cmd->dw1.hme_stream_in_ref_cost = 5 ; - cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ; - cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ; + avc_priv_surface = obj_surface->private_data; - cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ; - cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ; + cmd = i965_gpe_context_map_curbe(gpe_context); - cmd->dw3.large_mv_threshold = 128 ; - cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ; - cmd->dw5.zmv_threshold = 4 ; - cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60; - cmd->dw7.min_dist_threshold = 10 ; + if (!cmd) + return; - if (generic_state->frame_type == SLICE_TYPE_P) { - memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char)); + memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data)); - } else if (generic_state->frame_type == SLICE_TYPE_B) { - memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char)); + cmd->dw5.target_size_flag = 0 ; + if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) { + /*overflow*/ + generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits; + cmd->dw5.target_size_flag = 1 ; } - cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ; - cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ; - cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ; - cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ; - cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ; - cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ; - cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ; - - i965_gpe_context_unmap_curbe(gpe_context); - -} - -static void -gen9_avc_send_surface_sfd(VADriverContextP ctx, - struct encode_state *encode_state, - struct i965_gpe_context *gpe_context, - struct intel_encoder_context *encoder_context, - void * param) -{ - struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; - struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx; - struct i965_gpe_resource *gpe_resource; - int size = 0; - - /*HME mv data surface memv output 4x*/ - gpe_resource = &avc_ctx->s4x_memv_data_buffer; - i965_add_buffer_2d_gpe_surface(ctx, gpe_context, - gpe_resource, - 1, - I965_SURFACEFORMAT_R8_UNORM, - GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX); - - /* memv distortion */ - gpe_resource = &avc_ctx->s4x_memv_distortion_buffer; - i965_add_buffer_2d_gpe_surface(ctx, gpe_context, - gpe_resource, - 1, - I965_SURFACEFORMAT_R8_UNORM, - GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX); - /*buffer output*/ - size = 32 * 4 * 4; - gpe_resource = &avc_ctx->res_sfd_output_buffer; - i965_add_buffer_gpe_surface(ctx, - gpe_context, - gpe_resource, - 0, - size / 4, - 0, - GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX); + if (generic_state->skip_frame_enbale) { + cmd->dw6.num_skip_frames = generic_state->num_skip_frames ; + cmd->dw7.size_skip_frames = generic_state->size_skip_frames; -} + generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames; -static VAStatus -gen9_avc_kernel_sfd(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct i965_gpe_table *gpe = &i965->gpe_table; - struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; - struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx; - struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx; + } + cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ; + cmd->dw1.frame_number = generic_state->seq_frame_number ; + cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ; + cmd->dw5.cur_frame_type = generic_state->frame_type ; + cmd->dw5.brc_flag = 0 ; + cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ; - struct i965_gpe_context *gpe_context; - struct gpe_media_object_parameter media_object_param; - struct gpe_media_object_inline_data media_object_inline_data; - int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION; - gpe_context = &(avc_ctx->context_sfd.gpe_contexts); + if (avc_state->multi_pre_enable) { + cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ; + cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1 + } - gpe->context_init(ctx, gpe_context); - gpe->reset_binding_table(ctx, gpe_context); + cmd->dw5.max_num_paks = generic_state->num_pak_passes ; + if (avc_state->min_max_qp_enable) { + switch (generic_state->frame_type) { + case SLICE_TYPE_I: + cmd->dw6.minimum_qp = avc_state->min_qp_i ; + cmd->dw6.maximum_qp = avc_state->max_qp_i ; + break; + case SLICE_TYPE_P: + cmd->dw6.minimum_qp = avc_state->min_qp_p ; + cmd->dw6.maximum_qp = avc_state->max_qp_p ; + break; + case SLICE_TYPE_B: + cmd->dw6.minimum_qp = avc_state->min_qp_b ; + cmd->dw6.maximum_qp = avc_state->max_qp_b ; + break; + } + } else { + cmd->dw6.minimum_qp = 0 ; + cmd->dw6.maximum_qp = 0 ; + } - /*set curbe*/ - generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL); + generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame; - /*send surface*/ - generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL); + if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) { + cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150); + cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150); + cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150); + cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150); + cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40))); + cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75))); + cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97))); + cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100))); + cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100))); + cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100))); - gpe->setup_interface_data(ctx, gpe_context); + } - memset(&media_object_param, 0, sizeof(media_object_param)); - memset(&media_object_inline_data, 0, sizeof(media_object_inline_data)); - media_object_param.pinline_data = &media_object_inline_data; - media_object_param.inline_size = sizeof(media_object_inline_data); + memset(&common_param, 0, sizeof(common_param)); + common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel; + common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel; + common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs; + common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs; + common_param.frames_per_100s = generic_state->frames_per_100s; + common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit; + common_param.target_bit_rate = generic_state->target_bit_rate; - gen9_avc_run_kernel_media_object(ctx, encoder_context, - gpe_context, - media_function, - &media_object_param); + i965_gpe_context_unmap_curbe(gpe_context); - return VA_STATUS_SUCCESS; + return; } /* @@ -5657,6 +6616,9 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx, IS_GLK(i965->intel.device_info)) { kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data); kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data); + } else if (IS_GEN8(i965->intel.device_info)) { + kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data); + kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data); } /* 4x scaling kernel*/ @@ -5722,7 +6684,11 @@ gen9_avc_kernel_init_me(VADriverContextP ctx, struct i965_kernel common_kernel; int i = 0; - kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data); + if (IS_GEN8(i965->intel.device_info)) { + kernel_param.curbe_size = sizeof(gen8_avc_me_curbe_data); + } else { + kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data); + } kernel_param.inline_data_size = 0; kernel_param.sampler_size = 0; @@ -5782,6 +6748,9 @@ gen9_avc_kernel_init_mbenc(VADriverContextP ctx, IS_GLK(i965->intel.device_info)) { curbe_size = sizeof(gen95_avc_mbenc_curbe_data); num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC; + } else if (IS_GEN8(i965->intel.device_info)) { + curbe_size = sizeof(gen8_avc_mbenc_curbe_data); + num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC; } assert(curbe_size > 0); @@ -5827,9 +6796,23 @@ gen9_avc_kernel_init_brc(VADriverContextP ctx, struct encoder_kernel_parameter kernel_param ; struct encoder_scoreboard_parameter scoreboard_param; struct i965_kernel common_kernel; + int num_brc_init_kernels = 0; int i = 0; - const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = { + if (IS_GEN8(i965->intel.device_info)) { + num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1; + } else { + num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC; + } + + const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = { + (sizeof(gen9_avc_brc_init_reset_curbe_data)), + (sizeof(gen8_avc_frame_brc_update_curbe_data)), + (sizeof(gen9_avc_brc_init_reset_curbe_data)), + (sizeof(gen8_avc_mbenc_curbe_data)), + 0, + }; + const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = { (sizeof(gen9_avc_brc_init_reset_curbe_data)), (sizeof(gen9_avc_frame_brc_update_curbe_data)), (sizeof(gen9_avc_brc_init_reset_curbe_data)), @@ -5847,8 +6830,12 @@ gen9_avc_kernel_init_brc(VADriverContextP ctx, scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard; scoreboard_param.walkpat_flag = 0; - for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) { - kernel_param.curbe_size = brc_curbe_size[i]; + for (i = 0; i < num_brc_init_kernels; i++) { + if (IS_GEN8(i965->intel.device_info)) { + kernel_param.curbe_size = gen8_brc_curbe_size[i]; + } else { + kernel_param.curbe_size = gen9_brc_curbe_size[i]; + } gpe_context = &kernel_context->gpe_contexts[i]; gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param); gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param); @@ -6240,9 +7227,10 @@ gen9_avc_encode_check_parameter(VADriverContextP ctx, rate_control_mode == VA_RC_CQP)) { generic_state->brc_need_reset = 0;// not support by CQP } - - if (generic_state->brc_need_reset && !avc_state->sfd_mb_enable) { + if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) { avc_state->sfd_enable = 0; + } else { + avc_state->sfd_enable = 1; } if (generic_state->frames_per_window_size == 0) { @@ -6271,6 +7259,7 @@ gen9_avc_encode_check_parameter(VADriverContextP ctx, if (avc_state->caf_supported) { switch (generic_state->frame_type) { case SLICE_TYPE_I: + avc_state->caf_enable = 0; break; case SLICE_TYPE_P: avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01; @@ -6755,6 +7744,40 @@ gen9_avc_vme_context_destroy(void * context) } static void +gen8_avc_kernel_init(VADriverContextP ctx, + struct intel_encoder_context *encoder_context) +{ + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx; + struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx; + int fei_enabled = encoder_context->fei_enabled; + + generic_ctx->get_kernel_header_and_size = fei_enabled ? + intel_avc_fei_get_kernel_header_and_size : + intel_avc_get_kernel_header_and_size ; + gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling); + gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc); + gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me); + gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled); + gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd); + + //function pointer + generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x; + generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x; + generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me; + generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc; + generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset; + generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update; + generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd; + + generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling; + generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me; + generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc; + generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset; + generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update; + generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd; +} +static void gen9_avc_kernel_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { @@ -8523,7 +9546,6 @@ gen9_avc_encode_picture(VADriverContextP ctx, else intel_batchbuffer_start_atomic_bcs(batch, 0x1000); intel_batchbuffer_emit_mi_flush(batch); - for (generic_state->curr_pak_pass = 0; generic_state->curr_pak_pass < generic_state->num_pak_passes; generic_state->curr_pak_pass++) { @@ -8542,7 +9564,6 @@ gen9_avc_encode_picture(VADriverContextP ctx, gen9_avc_pak_picture_level(ctx, encode_state, encoder_context); gen9_avc_pak_slice_level(ctx, encode_state, encoder_context); gen9_avc_read_mfc_status(ctx, encoder_context); - } if (avc_ctx->pres_slice_batch_buffer_2nd_level) { @@ -8684,6 +9705,9 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels; generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels); } + } else if (IS_GEN8(i965->intel.device_info)) { + generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels; + generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels); } else if (IS_KBL(i965->intel.device_info) || IS_GLK(i965->intel.device_info)) { generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels; @@ -8876,9 +9900,12 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en avc_state->lambda_table_enable = 0; - - if (IS_SKL(i965->intel.device_info) || - IS_BXT(i965->intel.device_info)) { + if (IS_GEN8(i965->intel.device_info)) { + avc_state->brc_const_data_surface_width = 64; + avc_state->brc_const_data_surface_height = 44; + avc_state->mb_status_supported = 0; + } else if (IS_SKL(i965->intel.device_info) || + IS_BXT(i965->intel.device_info)) { avc_state->brc_const_data_surface_width = 64; avc_state->brc_const_data_surface_height = 44; avc_state->brc_split_enable = 1; @@ -8926,7 +9953,11 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG; status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG; - gen9_avc_kernel_init(ctx, encoder_context); + if (IS_GEN8(i965->intel.device_info)) { + gen8_avc_kernel_init(ctx, encoder_context); + } else { + gen9_avc_kernel_init(ctx, encoder_context); + } encoder_context->vme_context = vme_context; encoder_context->vme_pipeline = gen9_avc_vme_pipeline; encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;