OSDN Git Service

add init kernel, set curbe, send surface for gen8 avc encoder
authorWang Tiatian <tiantian.wang@intel.com>
Fri, 8 Sep 2017 15:13:26 +0000 (11:13 -0400)
committerXiang, Haihao <haihao.xiang@intel.com>
Fri, 22 Sep 2017 08:27:58 +0000 (16:27 +0800)
Signed-off-by: Wang Tiatian <tiantian.wang@intel.com>
src/gen9_avc_encoder.c

index c465a35..5958110 100644 (file)
@@ -260,6 +260,138 @@ static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_in
     }
 };
 
+static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
+    //unsigned int 0
+    {
+        0
+    },
+
+    //unsigned int 1
+    {
+        0
+    },
+
+    //unsigned int 2
+    {
+        0
+    },
+
+    //unsigned int 3
+    {
+
+        10,
+        50
+
+    },
+
+    //unsigned int 4
+    {
+
+        100,
+        150
+
+    },
+
+    //unsigned int 5
+    {
+        0, 0, 0, 0
+    },
+
+    //unsigned int 6
+    {
+        0, 0, 0, 0
+    },
+
+    //unsigned int 7
+    {
+        0
+    },
+
+    //unsigned int 8
+    {
+
+        1,
+        1,
+        3,
+        2
+
+    },
+
+    //unsigned int 9
+    {
+
+        1,
+        40,
+        5,
+        5
+
+    },
+
+    //unsigned int 10
+    {
+
+        3,
+        1,
+        7,
+        18
+
+    },
+
+    //unsigned int 11
+    {
+
+        25,
+        37,
+        40,
+        75
+
+    },
+
+    //unsigned int 12
+    {
+
+        97,
+        103,
+        125,
+        160
+
+    },
+
+    //unsigned int 13
+    {
+
+        -3,
+        -2,
+        -1,
+        0
+
+    },
+
+    //unsigned int 14
+    {
+
+        1,
+        2,
+        3,
+        0xff
+
+    },
+
+    //unsigned int 15
+    {
+        0, 0
+    },
+
+    //unsigned int 16
+    {
+        0, 0
+    },
+
+    //unsigned int 17
+    {
+        0, 0
+    },
+};
 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
     // unsigned int 0
     {
@@ -997,6 +1129,7 @@ gen9_avc_allocate_resources(VADriverContextP ctx,
                                                       "brc const data buffer");
         if (!allocate_flag)
             goto failed_allocation;
+        i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
 
         if (generic_state->brc_distortion_buffer_supported) {
             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
@@ -1104,6 +1237,7 @@ gen9_avc_allocate_resources(VADriverContextP ctx,
                                                    "sfd output buffer");
         if (!allocate_flag)
             goto failed_allocation;
+        i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
 
         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
         size = ALIGN(52, 64);
@@ -1526,6 +1660,7 @@ gen9_avc_send_surface_scaling(VADriverContextP ctx,
                               void *param)
 {
     struct scaling_param *surface_param = (struct scaling_param *)param;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     unsigned int surface_format;
     unsigned int res_size;
 
@@ -1547,24 +1682,44 @@ gen9_avc_send_surface_scaling(VADriverContextP ctx,
                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
 
     /*add buffer mv_proc_stat, here need change*/
-    if (surface_param->mbv_proc_stat_enabled) {
-        res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
+    if (IS_GEN8(i965->intel.device_info)) {
+        if (surface_param->mbv_proc_stat_enabled) {
+            res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
 
-        i965_add_buffer_gpe_surface(ctx,
-                                    gpe_context,
-                                    surface_param->pres_mbv_proc_stat_buffer,
-                                    0,
-                                    res_size / 4,
-                                    0,
-                                    GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
-    } else if (surface_param->enable_mb_flatness_check) {
-        i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
-                                       surface_param->pres_flatness_check_surface,
-                                       1,
-                                       I965_SURFACEFORMAT_R8_UNORM,
-                                       GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
-    }
+            i965_add_buffer_gpe_surface(ctx,
+                                        gpe_context,
+                                        surface_param->pres_mbv_proc_stat_buffer,
+                                        0,
+                                        res_size / 4,
+                                        0,
+                                        GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
+        }
+        if (surface_param->enable_mb_flatness_check) {
+            i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                           surface_param->pres_flatness_check_surface,
+                                           1,
+                                           I965_SURFACEFORMAT_R8_UNORM,
+                                           GEN8_SCALING_FRAME_FLATNESS_DST_CM);
+        }
+    } else {
+        if (surface_param->mbv_proc_stat_enabled) {
+            res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
 
+            i965_add_buffer_gpe_surface(ctx,
+                                        gpe_context,
+                                        surface_param->pres_mbv_proc_stat_buffer,
+                                        0,
+                                        res_size / 4,
+                                        0,
+                                        GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
+        } else if (surface_param->enable_mb_flatness_check) {
+            i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                           surface_param->pres_flatness_check_surface,
+                                           1,
+                                           I965_SURFACEFORMAT_R8_UNORM,
+                                           GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
+        }
+    }
     return;
 }
 
@@ -1827,6 +1982,95 @@ gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
     /* set paramters DW19/DW20 for slices */
 }
 
+static void
+gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context,
+                                struct gen8_mfx_avc_img_state *pstate)
+{
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+
+    VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
+    VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
+
+    memset(pstate, 0, sizeof(*pstate));
+
+    pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
+    pstate->dw0.command_sub_opcode_b = 0;
+    pstate->dw0.command_sub_opcode_a = 0;
+    pstate->dw0.command_opcode = 1;
+    pstate->dw0.command_pipeline = 2;
+    pstate->dw0.command_type = 3;
+
+    pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
+
+    pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
+    pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
+
+    pstate->dw3.image_structure = 0;//frame is zero
+    pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
+    pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
+    pstate->dw3.inter_mb_conf_flag = 0;
+    pstate->dw3.intra_mb_conf_flag = 0;
+    pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
+    pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
+
+    pstate->dw4.field_picture_flag = 0;
+    pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
+    pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
+    pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
+    pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
+    pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
+    pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
+    pstate->dw4.mb_mv_format_flag = 1;
+    pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
+    pstate->dw4.mv_unpacked_flag = 1;
+    pstate->dw4.insert_test_flag = 0;
+    pstate->dw4.load_slice_pointer_flag = 0;
+    pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
+    pstate->dw4.minimum_frame_size = 0;
+    pstate->dw5.intra_mb_max_bit_flag = 1;
+    pstate->dw5.inter_mb_max_bit_flag = 1;
+    pstate->dw5.frame_size_over_flag = 1;
+    pstate->dw5.frame_size_under_flag = 1;
+    pstate->dw5.intra_mb_ipcm_flag = 1;
+    pstate->dw5.mb_rate_ctrl_flag = 0;
+    pstate->dw5.non_first_pass_flag = 0;
+    pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
+    pstate->dw5.aq_chroma_disable = 1;
+    if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
+        pstate->dw5.aq_enable = avc_state->tq_enable;
+        pstate->dw5.aq_rounding = avc_state->tq_rounding;
+    } else {
+        pstate->dw5.aq_rounding = 0;
+    }
+
+    pstate->dw6.intra_mb_max_size = 2700;
+    pstate->dw6.inter_mb_max_size = 4095;
+
+    pstate->dw8.slice_delta_qp_max0 = 0;
+    pstate->dw8.slice_delta_qp_max1 = 0;
+    pstate->dw8.slice_delta_qp_max2 = 0;
+    pstate->dw8.slice_delta_qp_max3 = 0;
+
+    pstate->dw9.slice_delta_qp_min0 = 0;
+    pstate->dw9.slice_delta_qp_min1 = 0;
+    pstate->dw9.slice_delta_qp_min2 = 0;
+    pstate->dw9.slice_delta_qp_min3 = 0;
+
+    pstate->dw10.frame_bitrate_min = 0;
+    pstate->dw10.frame_bitrate_min_unit = 1;
+    pstate->dw10.frame_bitrate_min_unit_mode = 1;
+    pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
+    pstate->dw10.frame_bitrate_max_unit = 1;
+    pstate->dw10.frame_bitrate_max_unit_mode = 1;
+
+    pstate->dw11.frame_bitrate_min_delta = 0;
+    pstate->dw11.frame_bitrate_max_delta = 0;
+    /* set paramters DW19/DW20 for slices */
+}
 void gen9_avc_set_image_state(VADriverContextP ctx,
                               struct encode_state *encode_state,
                               struct intel_encoder_context *encoder_context,
@@ -1867,6 +2111,47 @@ void gen9_avc_set_image_state(VADriverContextP ctx,
     return;
 }
 
+void gen8_avc_set_image_state(VADriverContextP ctx,
+                              struct encode_state *encode_state,
+                              struct intel_encoder_context *encoder_context,
+                              struct i965_gpe_resource *gpe_resource)
+{
+    struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
+    char *pdata;
+    int i;
+    unsigned int * data;
+    struct gen8_mfx_avc_img_state cmd;
+
+    pdata = i965_map_gpe_resource(gpe_resource);
+
+    if (!pdata)
+        return;
+
+    gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
+    for (i = 0; i < generic_state->num_pak_passes; i++) {
+
+        if (i == 0) {
+            cmd.dw4.macroblock_stat_enable = 0;
+            cmd.dw5.non_first_pass_flag = 0;
+        } else {
+            cmd.dw4.macroblock_stat_enable = 1;
+            cmd.dw5.non_first_pass_flag = 1;
+            cmd.dw5.intra_mb_ipcm_flag = 1;
+            cmd.dw3.inter_mb_conf_flag = 1;
+            cmd.dw3.intra_mb_conf_flag = 1;
+        }
+        cmd.dw5.mb_rate_ctrl_flag = 0;
+        memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
+        data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
+        *data = MI_BATCH_BUFFER_END;
+
+        pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
+    }
+    i965_unmap_gpe_resource(gpe_resource);
+    return;
+}
+
 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
                                       struct encode_state *encode_state,
                                       struct intel_encoder_context *encoder_context,
@@ -2576,7 +2861,8 @@ gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
     unsigned char is_g95 = 0;
 
     if (IS_SKL(i965->intel.device_info) ||
-        IS_BXT(i965->intel.device_info))
+        IS_BXT(i965->intel.device_info) ||
+        IS_GEN8(i965->intel.device_info))
         is_g95 = 0;
     else if (IS_KBL(i965->intel.device_info) ||
              IS_GLK(i965->intel.device_info))
@@ -2662,14 +2948,22 @@ gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
 
     /* MB statistical data surface*/
-    i965_add_buffer_gpe_surface(ctx,
-                                gpe_context,
-                                &avc_ctx->res_mb_status_buffer,
-                                0,
-                                avc_ctx->res_mb_status_buffer.size,
-                                0,
-                                (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
-
+    if (!IS_GEN8(i965->intel.device_info)) {
+        i965_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    &avc_ctx->res_mb_status_buffer,
+                                    0,
+                                    avc_ctx->res_mb_status_buffer.size,
+                                    0,
+                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
+    } else {
+        i965_add_buffer_2d_gpe_surface(ctx,
+                                       gpe_context,
+                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
+    }
     return;
 }
 
@@ -2774,7 +3068,11 @@ gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
     }
     /* image state construct*/
-    gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
+    if (IS_GEN8(i965->intel.device_info)) {
+        gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
+    } else {
+        gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
+    }
     /* set surface frame mbenc*/
     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
 
@@ -3281,7 +3579,6 @@ gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
         {
             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
-                //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp];
             } else if (avc_state->skip_bias_adjustment_enable) {
                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
                 // No need to check for P picture as the flag is only enabled for P picture */
@@ -4199,19 +4496,35 @@ gen9_avc_send_surface_mbenc(VADriverContextP ctx,
 
     if (generic_state->hme_enabled) {
         /*memv input 4x*/
-        gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
-        i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
-                                       gpe_resource,
-                                       1,
-                                       I965_SURFACEFORMAT_R8_UNORM,
-                                       GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
-        /* memv distortion input*/
-        gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
-        i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
-                                       gpe_resource,
-                                       1,
-                                       I965_SURFACEFORMAT_R8_UNORM,
-                                       GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
+        if (!IS_GEN8(i965->intel.device_info)) {
+            gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
+            i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                           gpe_resource,
+                                           1,
+                                           I965_SURFACEFORMAT_R8_UNORM,
+                                           GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
+            /* memv distortion input*/
+            gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
+            i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                           gpe_resource,
+                                           1,
+                                           I965_SURFACEFORMAT_R8_UNORM,
+                                           GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
+        } else if (generic_state->frame_type != SLICE_TYPE_I) {
+            gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
+            i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                           gpe_resource,
+                                           1,
+                                           I965_SURFACEFORMAT_R8_UNORM,
+                                           GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
+            /* memv distortion input*/
+            gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
+            i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                           gpe_resource,
+                                           1,
+                                           I965_SURFACEFORMAT_R8_UNORM,
+                                           GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
+        }
     }
 
     /*mbbrc const data_buffer*/
@@ -4348,21 +4661,28 @@ gen9_avc_send_surface_mbenc(VADriverContextP ctx,
                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
 
     }
+    if (!IS_GEN8(i965->intel.device_info)) {
+        if (param->mb_vproc_stats_enable) {
+            /*mb status buffer input*/
+            size = frame_mb_size * 16 * 4;
+            gpe_resource = &(avc_ctx->res_mb_status_buffer);
+            i965_add_buffer_gpe_surface(ctx,
+                                        gpe_context,
+                                        gpe_resource,
+                                        0,
+                                        size / 4,
+                                        0,
+                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
 
-    if (param->mb_vproc_stats_enable) {
-        /*mb status buffer input*/
-        size = frame_mb_size * 16 * 4;
-        gpe_resource = &(avc_ctx->res_mb_status_buffer);
-        i965_add_buffer_gpe_surface(ctx,
-                                    gpe_context,
-                                    gpe_resource,
-                                    0,
-                                    size / 4,
-                                    0,
-                                    GEN9_AVC_MBENC_MB_STATS_INDEX);
-
+        } else if (avc_state->flatness_check_enable) {
+            gpe_resource = &(avc_ctx->res_flatness_check_surface);
+            i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                           gpe_resource,
+                                           1,
+                                           I965_SURFACEFORMAT_R8_UNORM,
+                                           GEN9_AVC_MBENC_MB_STATS_INDEX);
+        }
     } else if (avc_state->flatness_check_enable) {
-
         gpe_resource = &(avc_ctx->res_flatness_check_surface);
         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
                                        gpe_resource,
@@ -4420,25 +4740,38 @@ gen9_avc_send_surface_mbenc(VADriverContextP ctx,
                                            I965_SURFACEFORMAT_R8_UNORM,
                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
         }
+        if (IS_GEN8(i965->intel.device_info)) {
+            if (avc_state->sfd_enable) {
+                size = 128 / sizeof(unsigned long);
+                gpe_resource = &(avc_ctx->res_sfd_output_buffer);
+                i965_add_buffer_gpe_surface(ctx,
+                                            gpe_context,
+                                            gpe_resource,
+                                            0,
+                                            size / 4,
+                                            0,
+                                            GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
 
-        if (avc_state->sfd_enable && generic_state->hme_enabled) {
-            if (generic_state->frame_type == SLICE_TYPE_P) {
-                gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
-
-            } else if (generic_state->frame_type == SLICE_TYPE_B) {
-                gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
             }
+        } else {
+            if (avc_state->sfd_enable && generic_state->hme_enabled) {
+                if (generic_state->frame_type == SLICE_TYPE_P) {
+                    gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
+                } else if (generic_state->frame_type == SLICE_TYPE_B) {
+                    gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
+                }
+                if (generic_state->frame_type != SLICE_TYPE_I) {
+                    i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                                   gpe_resource,
+                                                   1,
+                                                   I965_SURFACEFORMAT_R8_UNORM,
+                                                   (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
 
-            if (generic_state->frame_type != SLICE_TYPE_I) {
-                i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
-                                               gpe_resource,
-                                               1,
-                                               I965_SURFACEFORMAT_R8_UNORM,
-                                               (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
+
+                }
             }
         }
     }
-
     return;
 }
 
@@ -5470,168 +5803,794 @@ gen9_avc_kernel_wp(VADriverContextP ctx,
     /*set curbe*/
     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
 
-    /*send surface*/
-    generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
+    /*send surface*/
+    generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
+
+    gpe->setup_interface_data(ctx, gpe_context);
+
+    memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+    /* the scaling is based on 8x8 blk level */
+    kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
+    kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
+    kernel_walker_param.no_dependency = 1;
+
+    i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+
+    gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
+                                            gpe_context,
+                                            media_function,
+                                            &media_object_walker_param);
+
+    return VA_STATUS_SUCCESS;
+}
+
+
+/*
+sfd related function
+*/
+static void
+gen9_avc_set_curbe_sfd(VADriverContextP ctx,
+                       struct encode_state *encode_state,
+                       struct i965_gpe_context *gpe_context,
+                       struct intel_encoder_context *encoder_context,
+                       void * param)
+{
+    gen9_avc_sfd_curbe_data *cmd;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+
+    cmd = i965_gpe_context_map_curbe(gpe_context);
+
+    if (!cmd)
+        return;
+    memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
+
+    cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
+    cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
+    cmd->dw0.stream_in_type = 7 ;
+    cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
+    cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
+    cmd->dw0.vdenc_mode_disable = 1 ;
+
+    cmd->dw1.hme_stream_in_ref_cost = 5 ;
+    cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
+    cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
+
+    cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
+    cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
+
+    cmd->dw3.large_mv_threshold = 128 ;
+    cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
+    cmd->dw5.zmv_threshold = 4 ;
+    cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
+    cmd->dw7.min_dist_threshold = 10 ;
+
+    if (generic_state->frame_type == SLICE_TYPE_P) {
+        memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
+
+    } else if (generic_state->frame_type == SLICE_TYPE_B) {
+        memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
+    }
+
+    cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
+    cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
+    cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
+    cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
+    cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
+    cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
+    cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
+
+    i965_gpe_context_unmap_curbe(gpe_context);
+
+}
+
+static void
+gen9_avc_send_surface_sfd(VADriverContextP ctx,
+                          struct encode_state *encode_state,
+                          struct i965_gpe_context *gpe_context,
+                          struct intel_encoder_context *encoder_context,
+                          void * param)
+{
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+    struct i965_gpe_resource *gpe_resource;
+    int size = 0;
+
+    /*HME mv data surface memv output 4x*/
+    gpe_resource = &avc_ctx->s4x_memv_data_buffer;
+    i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                   gpe_resource,
+                                   1,
+                                   I965_SURFACEFORMAT_R8_UNORM,
+                                   GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
+
+    /* memv distortion */
+    gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
+    i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                   gpe_resource,
+                                   1,
+                                   I965_SURFACEFORMAT_R8_UNORM,
+                                   GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
+    /*buffer output*/
+    size = 32 * 4 * 4;
+    gpe_resource = &avc_ctx->res_sfd_output_buffer;
+    i965_add_buffer_gpe_surface(ctx,
+                                gpe_context,
+                                gpe_resource,
+                                0,
+                                size / 4,
+                                0,
+                                GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
+
+}
+
+static VAStatus
+gen9_avc_kernel_sfd(VADriverContextP ctx,
+                    struct encode_state *encode_state,
+                    struct intel_encoder_context *encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_gpe_table *gpe = &i965->gpe_table;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
+
+    struct i965_gpe_context *gpe_context;
+    struct gpe_media_object_parameter media_object_param;
+    struct gpe_media_object_inline_data media_object_inline_data;
+    int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
+    gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
+
+    gpe->context_init(ctx, gpe_context);
+    gpe->reset_binding_table(ctx, gpe_context);
+
+    /*set curbe*/
+    generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
+
+    /*send surface*/
+    generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
+
+    gpe->setup_interface_data(ctx, gpe_context);
+
+    memset(&media_object_param, 0, sizeof(media_object_param));
+    memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
+    media_object_param.pinline_data = &media_object_inline_data;
+    media_object_param.inline_size = sizeof(media_object_inline_data);
+
+    gen9_avc_run_kernel_media_object(ctx, encoder_context,
+                                     gpe_context,
+                                     media_function,
+                                     &media_object_param);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static void
+gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
+                         struct encode_state *encode_state,
+                         struct i965_gpe_context *gpe_context,
+                         struct intel_encoder_context *encoder_context,
+                         void * param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    gen8_avc_mbenc_curbe_data *cmd;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+
+    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+    VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
+    VASurfaceID surface_id;
+    struct object_surface *obj_surface;
+
+    struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
+    unsigned char qp = 0;
+    unsigned char me_method = 0;
+    unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
+    unsigned int table_idx = 0;
+    unsigned int curbe_size = 0;
+
+    unsigned int preset = generic_state->preset;
+    if (IS_GEN8(i965->intel.device_info)) {
+        cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
+        if (!cmd)
+            return;
+        curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
+        memset(cmd, 0, curbe_size);
+
+        if (mbenc_i_frame_dist_in_use) {
+            memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
+        } else {
+            switch (generic_state->frame_type) {
+            case SLICE_TYPE_I:
+                memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
+                break;
+            case SLICE_TYPE_P:
+                memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
+                break;
+            case SLICE_TYPE_B:
+                memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
+                break;
+            default:
+                assert(0);
+            }
+        }
+    } else {
+        assert(0);
+
+        return;
+    }
+
+    me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
+    qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+
+    cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
+    cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
+    cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
+    cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
+
+    cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
+    cmd->dw38.max_len_sp = 0;
+
+    cmd->dw3.src_access = 0;
+    cmd->dw3.ref_access = 0;
+
+    if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
+        //disable ftq_override by now.
+        if (avc_state->ftq_override) {
+            cmd->dw3.ftq_enable = avc_state->ftq_enable;
+
+        } else {
+            if (generic_state->frame_type == SLICE_TYPE_P) {
+                cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
+
+            } else {
+                cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
+            }
+        }
+    } else {
+        cmd->dw3.ftq_enable = 0;
+    }
+
+    if (avc_state->disable_sub_mb_partion)
+        cmd->dw3.sub_mb_part_mask = 0x7;
+
+    if (mbenc_i_frame_dist_in_use) {
+        cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
+        cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
+        cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
+        cmd->dw6.batch_buffer_end = 0;
+        cmd->dw31.intra_compute_type = 1;
+    } else {
+        cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
+        cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
+        cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
+
+        {
+            memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
+            if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
+            } else if (avc_state->skip_bias_adjustment_enable) {
+                /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
+                // No need to check for P picture as the flag is only enabled for P picture */
+                cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
+            }
+        }
+        table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
+        memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
+    }
+    cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
+    cmd->dw4.field_parity_flag = 0;//bottom field
+    cmd->dw4.enable_cur_fld_idr = 0;//field realted
+    cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
+    cmd->dw4.hme_enable = generic_state->hme_enabled;
+    cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
+    cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
+
+    cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
+    cmd->dw7.src_field_polarity = 0;//field related
+
+    /*ftq_skip_threshold_lut set,dw14 /15*/
+
+    /*r5 disable NonFTQSkipThresholdLUT*/
+    if (generic_state->frame_type == SLICE_TYPE_P) {
+        cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
+    } else if (generic_state->frame_type == SLICE_TYPE_B) {
+        cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
+    }
+
+    cmd->dw13.qp_prime_y = qp;
+    cmd->dw13.qp_prime_cb = qp;
+    cmd->dw13.qp_prime_cr = qp;
+    cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
+
+    if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
+        switch (gen9_avc_multi_pred[preset]) {
+        case 0:
+            cmd->dw32.mult_pred_l0_disable = 128;
+            cmd->dw32.mult_pred_l1_disable = 128;
+            break;
+        case 1:
+            cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
+            cmd->dw32.mult_pred_l1_disable = 128;
+            break;
+        case 2:
+            cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
+            cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
+            break;
+        case 3:
+            cmd->dw32.mult_pred_l0_disable = 1;
+            cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
+            break;
+        }
+
+    } else {
+        cmd->dw32.mult_pred_l0_disable = 128;
+        cmd->dw32.mult_pred_l1_disable = 128;
+    }
+
+    if (generic_state->frame_type == SLICE_TYPE_B) {
+        cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
+        cmd->dw34.list1_ref_id0_frm_field_parity = 0;
+        cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
+    }
+
+    cmd->dw34.b_original_bff = 0; //frame only
+    cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
+    cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
+    cmd->dw34.mad_enable_falg = avc_state->mad_enable;
+    cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
+    cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
+    cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
+
+    if (cmd->dw34.force_non_skip_check) {
+        cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
+    }
+
+    cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
+    cmd->dw38.ref_threshold = 400;
+    cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
+    cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
+
+    if (mbenc_i_frame_dist_in_use) {
+        cmd->dw13.qp_prime_y = 0;
+        cmd->dw13.qp_prime_cb = 0;
+        cmd->dw13.qp_prime_cr = 0;
+        cmd->dw33.intra_16x16_nondc_penalty = 0;
+        cmd->dw33.intra_8x8_nondc_penalty = 0;
+        cmd->dw33.intra_4x4_nondc_penalty = 0;
+    }
+    if (cmd->dw4.use_actual_ref_qp_value) {
+        cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
+        cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
+        cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
+        cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
+        cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
+        cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
+        cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
+        cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
+        cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
+        cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
+    }
+
+    table_idx = slice_type_kernel[generic_state->frame_type];
+    cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
+    if (generic_state->frame_type == SLICE_TYPE_I) {
+        cmd->dw0.skip_mode_enable = 0;
+        cmd->dw37.skip_mode_enable = 0;
+        cmd->dw36.hme_combine_overlap = 0;
+        cmd->dw47.intra_cost_sf = 16;
+        cmd->dw34.enable_direct_bias_adjustment = 0;
+        cmd->dw34.enable_global_motion_bias_adjustment = 0;
+
+    } else if (generic_state->frame_type == SLICE_TYPE_P) {
+        cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
+        cmd->dw3.bme_disable_fbr = 1;
+        cmd->dw5.ref_width = gen9_avc_search_x[preset];
+        cmd->dw5.ref_height = gen9_avc_search_y[preset];
+        cmd->dw7.non_skip_zmv_added = 1;
+        cmd->dw7.non_skip_mode_added = 1;
+        cmd->dw7.skip_center_mask = 1;
+        cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
+        cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
+        cmd->dw36.hme_combine_overlap = 1;
+        cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
+        cmd->dw39.ref_width = gen9_avc_search_x[preset];
+        cmd->dw39.ref_height = gen9_avc_search_y[preset];
+        cmd->dw34.enable_direct_bias_adjustment = 0;
+        cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
+        if (avc_state->global_motion_bias_adjustment_enable)
+            cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
+    } else {
+        cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
+        cmd->dw1.bi_weight = avc_state->bi_weight;
+        cmd->dw3.search_ctrl = 7;
+        cmd->dw3.skip_type = 1;
+        cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
+        cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
+        cmd->dw7.skip_center_mask = 0xff;
+        cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
+        cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
+        cmd->dw36.hme_combine_overlap = 1;
+        surface_id = slice_param->RefPicList1[0].picture_id;
+        obj_surface = SURFACE(surface_id);
+        if (!obj_surface) {
+            WARN_ONCE("Invalid backward reference frame\n");
+            return;
+        }
+        cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
+        cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
+        cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
+        cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
+        cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
+        cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
+        cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
+        cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
+        cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
+        cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
+        cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
+        cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
+        cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
+        cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
+        if (cmd->dw34.enable_direct_bias_adjustment) {
+            cmd->dw7.non_skip_zmv_added = 1;
+            cmd->dw7.non_skip_mode_added = 1;
+        }
+
+        cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
+        if (avc_state->global_motion_bias_adjustment_enable)
+            cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
+    }
+    avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
+
+    if (avc_state->rolling_intra_refresh_enable) {
+        /*by now disable it*/
+        if (generic_state->brc_enabled) {
+            cmd->dw4.enable_intra_refresh = false;
+            cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
+            cmd->dw48.widi_intra_refresh_mbx = 0;
+            cmd->dw58.widi_intra_refresh_mby = 0;
+        } else {
+            cmd->dw4.enable_intra_refresh = true;
+            cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
+        }
+        cmd->dw32.mult_pred_l0_disable = 128;
+        /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
+         across one P frame to another P frame, as needed by the RollingI algo */
+        cmd->dw48.widi_intra_refresh_mbx = 0;
+        cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
+        cmd->dw48.widi_intra_refresh_qp_delta = 0;
+
+    } else {
+        cmd->dw34.widi_intra_refresh_en = 0;
+    }
+
+    /*roi set disable by now. 49-56*/
+    if (curbe_param->roi_enabled) {
+        cmd->dw49.roi_1_x_left   = generic_state->roi[0].left;
+        cmd->dw49.roi_1_y_top    = generic_state->roi[0].top;
+        cmd->dw50.roi_1_x_right  = generic_state->roi[0].right;
+        cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
+
+        cmd->dw51.roi_2_x_left   = generic_state->roi[1].left;
+        cmd->dw51.roi_2_y_top    = generic_state->roi[1].top;
+        cmd->dw52.roi_2_x_right  = generic_state->roi[1].right;
+        cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
+
+        cmd->dw53.roi_3_x_left   = generic_state->roi[2].left;
+        cmd->dw53.roi_3_y_top    = generic_state->roi[2].top;
+        cmd->dw54.roi_3_x_right  = generic_state->roi[2].right;
+        cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
+
+        cmd->dw55.roi_4_x_left   = generic_state->roi[3].left;
+        cmd->dw55.roi_4_y_top    = generic_state->roi[3].top;
+        cmd->dw56.roi_4_x_right  = generic_state->roi[3].right;
+        cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
+
+        cmd->dw36.enable_cabac_work_around = 0;
+
+        if (!generic_state->brc_enabled) {
+            char tmp = 0;
+            tmp = generic_state->roi[0].value;
+            CLIP(tmp, -qp, AVC_QP_MAX - qp);
+            cmd->dw57.roi_1_dqp_prime_y = tmp;
+            tmp = generic_state->roi[1].value;
+            CLIP(tmp, -qp, AVC_QP_MAX - qp);
+            cmd->dw57.roi_2_dqp_prime_y = tmp;
+            tmp = generic_state->roi[2].value;
+            CLIP(tmp, -qp, AVC_QP_MAX - qp);
+            cmd->dw57.roi_3_dqp_prime_y = tmp;
+            tmp = generic_state->roi[3].value;
+            CLIP(tmp, -qp, AVC_QP_MAX - qp);
+            cmd->dw57.roi_4_dqp_prime_y = tmp;
+        } else {
+            cmd->dw34.roi_enable_flag = 0;
+        }
+    }
+
+    cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
+    cmd->dw66.mv_data_surf_index =  GEN8_AVC_MBENC_IND_MV_DATA_CM;
+    cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
+    cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
+    cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
+    cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
+    cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
+    cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
+    cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
+    cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
+    cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
+    cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
+    cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
+    cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
+    cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
+    cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
+    cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
+    cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
+    cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
+    cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
+    cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
+    cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
+
+    i965_gpe_context_unmap_curbe(gpe_context);
+
+    return;
+}
+
+static void
+gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
+                             struct encode_state *encode_state,
+                             struct i965_gpe_context *gpe_context,
+                             struct intel_encoder_context *encoder_context,
+                             void *param)
+{
+    gen8_avc_scaling4x_curbe_data *curbe_cmd;
+    struct scaling_param *surface_param = (struct scaling_param *)param;
+
+    curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
+
+    if (!curbe_cmd)
+        return;
+
+    memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
+
+    curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
+    curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
+
+    curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
+    curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
+
+    curbe_cmd->dw5.flatness_threshold = 0;
+    if (surface_param->enable_mb_flatness_check) {
+        curbe_cmd->dw5.flatness_threshold = 128;
+        curbe_cmd->dw8.flatness_output_bti_top_field = 4;
+    }
+
+    curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
+    curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
+    curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
+
+    if (curbe_cmd->dw6.enable_mb_variance_output ||
+        curbe_cmd->dw6.enable_mb_pixel_average_output) {
+        curbe_cmd->dw10.mbv_proc_states_bti_top_field  = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
+        curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
+    }
+
+    i965_gpe_context_unmap_curbe(gpe_context);
+    return;
+}
+
+static void
+gen8_avc_set_curbe_me(VADriverContextP ctx,
+                      struct encode_state *encode_state,
+                      struct i965_gpe_context *gpe_context,
+                      struct intel_encoder_context *encoder_context,
+                      void * param)
+{
+    gen8_avc_me_curbe_data *curbe_cmd;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+
+    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+
+    struct me_param * curbe_param = (struct me_param *)param ;
+    unsigned char  use_mv_from_prev_step = 0;
+    unsigned char write_distortions = 0;
+    unsigned char qp_prime_y = 0;
+    unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
+    unsigned char seach_table_idx = 0;
+    unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
+    unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
+    unsigned int scale_factor = 0;
+
+    qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    switch (curbe_param->hme_type) {
+    case INTEL_ENC_HME_4x : {
+        use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
+        write_distortions = 1;
+        mv_shift_factor = 2;
+        scale_factor = 4;
+        prev_mv_read_pos_factor = 0;
+        break;
+    }
+    case INTEL_ENC_HME_16x : {
+        use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
+        write_distortions = 0;
+        mv_shift_factor = 2;
+        scale_factor = 16;
+        prev_mv_read_pos_factor = 1;
+        break;
+    }
+    case INTEL_ENC_HME_32x : {
+        use_mv_from_prev_step = 0;
+        write_distortions = 0;
+        mv_shift_factor = 1;
+        scale_factor = 32;
+        prev_mv_read_pos_factor = 0;
+        break;
+    }
+    default:
+        assert(0);
+
+    }
+    curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
+
+    if (!curbe_cmd)
+        return;
+
+    downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
+    downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
+
+    memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
+
+    curbe_cmd->dw3.sub_pel_mode = 3;
+    if (avc_state->field_scaling_output_interleaved) {
+        /*frame set to zero,field specified*/
+        curbe_cmd->dw3.src_access = 0;
+        curbe_cmd->dw3.ref_access = 0;
+        curbe_cmd->dw7.src_field_polarity = 0;
+    }
+    curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
+    curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
+    curbe_cmd->dw5.qp_prime_y = qp_prime_y;
+
+    curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
+    curbe_cmd->dw6.write_distortions = write_distortions;
+    curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
+    curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
+
+    if (generic_state->frame_type == SLICE_TYPE_B) {
+        curbe_cmd->dw1.bi_weight = 32;
+        curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
+        me_method = gen9_avc_b_me_method[generic_state->preset];
+        seach_table_idx = 1;
+    }
 
-    gpe->setup_interface_data(ctx, gpe_context);
+    if (generic_state->frame_type == SLICE_TYPE_P ||
+        generic_state->frame_type == SLICE_TYPE_B)
+        curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
 
-    memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
-    /* the scaling is based on 8x8 blk level */
-    kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
-    kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
-    kernel_walker_param.no_dependency = 1;
+    curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
+    curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
 
-    i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+    memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
 
-    gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
-                                            gpe_context,
-                                            media_function,
-                                            &media_object_walker_param);
+    curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
+    curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
+    curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
+    curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
+    curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
+    curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
+    curbe_cmd->dw38.reserved = 0;
 
-    return VA_STATUS_SUCCESS;
+    i965_gpe_context_unmap_curbe(gpe_context);
+    return;
 }
 
-
-/*
-sfd related function
-*/
 static void
-gen9_avc_set_curbe_sfd(VADriverContextP ctx,
-                       struct encode_state *encode_state,
-                       struct i965_gpe_context *gpe_context,
-                       struct intel_encoder_context *encoder_context,
-                       void * param)
+gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
+                                    struct encode_state *encode_state,
+                                    struct i965_gpe_context *gpe_context,
+                                    struct intel_encoder_context *encoder_context,
+                                    void * param)
 {
-    gen9_avc_sfd_curbe_data *cmd;
+    gen8_avc_frame_brc_update_curbe_data *cmd;
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
-    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+    struct object_surface *obj_surface;
+    struct gen9_surface_avc *avc_priv_surface;
+    struct avc_param common_param;
 
-    cmd = i965_gpe_context_map_curbe(gpe_context);
+    obj_surface = encode_state->reconstructed_object;
 
-    if (!cmd)
+    if (!obj_surface || !obj_surface->private_data)
         return;
-    memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
-
-    cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
-    cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
-    cmd->dw0.stream_in_type = 7 ;
-    cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
-    cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
-    cmd->dw0.vdenc_mode_disable = 1 ;
-
-    cmd->dw1.hme_stream_in_ref_cost = 5 ;
-    cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
-    cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
+    avc_priv_surface = obj_surface->private_data;
 
-    cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
-    cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
+    cmd = i965_gpe_context_map_curbe(gpe_context);
 
-    cmd->dw3.large_mv_threshold = 128 ;
-    cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
-    cmd->dw5.zmv_threshold = 4 ;
-    cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
-    cmd->dw7.min_dist_threshold = 10 ;
+    if (!cmd)
+        return;
 
-    if (generic_state->frame_type == SLICE_TYPE_P) {
-        memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
+    memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
 
-    } else if (generic_state->frame_type == SLICE_TYPE_B) {
-        memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
+    cmd->dw5.target_size_flag = 0 ;
+    if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
+        /*overflow*/
+        generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
+        cmd->dw5.target_size_flag = 1 ;
     }
 
-    cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
-    cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
-    cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
-    cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
-    cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
-    cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
-    cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
-
-    i965_gpe_context_unmap_curbe(gpe_context);
-
-}
-
-static void
-gen9_avc_send_surface_sfd(VADriverContextP ctx,
-                          struct encode_state *encode_state,
-                          struct i965_gpe_context *gpe_context,
-                          struct intel_encoder_context *encoder_context,
-                          void * param)
-{
-    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
-    struct i965_gpe_resource *gpe_resource;
-    int size = 0;
-
-    /*HME mv data surface memv output 4x*/
-    gpe_resource = &avc_ctx->s4x_memv_data_buffer;
-    i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
-                                   gpe_resource,
-                                   1,
-                                   I965_SURFACEFORMAT_R8_UNORM,
-                                   GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
-
-    /* memv distortion */
-    gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
-    i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
-                                   gpe_resource,
-                                   1,
-                                   I965_SURFACEFORMAT_R8_UNORM,
-                                   GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
-    /*buffer output*/
-    size = 32 * 4 * 4;
-    gpe_resource = &avc_ctx->res_sfd_output_buffer;
-    i965_add_buffer_gpe_surface(ctx,
-                                gpe_context,
-                                gpe_resource,
-                                0,
-                                size / 4,
-                                0,
-                                GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
+    if (generic_state->skip_frame_enbale) {
+        cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
+        cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
 
-}
+        generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
 
-static VAStatus
-gen9_avc_kernel_sfd(VADriverContextP ctx,
-                    struct encode_state *encode_state,
-                    struct intel_encoder_context *encoder_context)
-{
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_gpe_table *gpe = &i965->gpe_table;
-    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
-    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
+    }
+    cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
+    cmd->dw1.frame_number = generic_state->seq_frame_number ;
+    cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
+    cmd->dw5.cur_frame_type = generic_state->frame_type ;
+    cmd->dw5.brc_flag = 0 ;
+    cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
 
-    struct i965_gpe_context *gpe_context;
-    struct gpe_media_object_parameter media_object_param;
-    struct gpe_media_object_inline_data media_object_inline_data;
-    int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
-    gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
+    if (avc_state->multi_pre_enable) {
+        cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
+        cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
+    }
 
-    gpe->context_init(ctx, gpe_context);
-    gpe->reset_binding_table(ctx, gpe_context);
+    cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
+    if (avc_state->min_max_qp_enable) {
+        switch (generic_state->frame_type) {
+        case SLICE_TYPE_I:
+            cmd->dw6.minimum_qp = avc_state->min_qp_i ;
+            cmd->dw6.maximum_qp = avc_state->max_qp_i ;
+            break;
+        case SLICE_TYPE_P:
+            cmd->dw6.minimum_qp = avc_state->min_qp_p ;
+            cmd->dw6.maximum_qp = avc_state->max_qp_p ;
+            break;
+        case SLICE_TYPE_B:
+            cmd->dw6.minimum_qp = avc_state->min_qp_b ;
+            cmd->dw6.maximum_qp = avc_state->max_qp_b ;
+            break;
+        }
+    } else {
+        cmd->dw6.minimum_qp = 0 ;
+        cmd->dw6.maximum_qp = 0 ;
+    }
 
-    /*set curbe*/
-    generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
+    generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
 
-    /*send surface*/
-    generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
+    if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
+        cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
+        cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
+        cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
+        cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
+        cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
+        cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
+        cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
+        cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
+        cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
+        cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
 
-    gpe->setup_interface_data(ctx, gpe_context);
+    }
 
-    memset(&media_object_param, 0, sizeof(media_object_param));
-    memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
-    media_object_param.pinline_data = &media_object_inline_data;
-    media_object_param.inline_size = sizeof(media_object_inline_data);
+    memset(&common_param, 0, sizeof(common_param));
+    common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
+    common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
+    common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
+    common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
+    common_param.frames_per_100s = generic_state->frames_per_100s;
+    common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
+    common_param.target_bit_rate = generic_state->target_bit_rate;
 
-    gen9_avc_run_kernel_media_object(ctx, encoder_context,
-                                     gpe_context,
-                                     media_function,
-                                     &media_object_param);
+    i965_gpe_context_unmap_curbe(gpe_context);
 
-    return VA_STATUS_SUCCESS;
+    return;
 }
 
 /*
@@ -5657,6 +6616,9 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
                IS_GLK(i965->intel.device_info)) {
         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
+    } else if (IS_GEN8(i965->intel.device_info)) {
+        kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
+        kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
     }
 
     /* 4x scaling kernel*/
@@ -5722,7 +6684,11 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
     struct i965_kernel common_kernel;
     int i = 0;
 
-    kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
+    if (IS_GEN8(i965->intel.device_info)) {
+        kernel_param.curbe_size = sizeof(gen8_avc_me_curbe_data);
+    } else {
+        kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
+    }
     kernel_param.inline_data_size = 0;
     kernel_param.sampler_size = 0;
 
@@ -5782,6 +6748,9 @@ gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
                IS_GLK(i965->intel.device_info)) {
         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
+    } else if (IS_GEN8(i965->intel.device_info)) {
+        curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
+        num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
     }
 
     assert(curbe_size > 0);
@@ -5827,9 +6796,23 @@ gen9_avc_kernel_init_brc(VADriverContextP ctx,
     struct encoder_kernel_parameter kernel_param ;
     struct encoder_scoreboard_parameter scoreboard_param;
     struct i965_kernel common_kernel;
+    int num_brc_init_kernels = 0;
     int i = 0;
 
-    const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
+    if (IS_GEN8(i965->intel.device_info)) {
+        num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
+    } else {
+        num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
+    }
+
+    const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
+        (sizeof(gen9_avc_brc_init_reset_curbe_data)),
+        (sizeof(gen8_avc_frame_brc_update_curbe_data)),
+        (sizeof(gen9_avc_brc_init_reset_curbe_data)),
+        (sizeof(gen8_avc_mbenc_curbe_data)),
+        0,
+    };
+    const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
@@ -5847,8 +6830,12 @@ gen9_avc_kernel_init_brc(VADriverContextP ctx,
     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
     scoreboard_param.walkpat_flag = 0;
 
-    for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
-        kernel_param.curbe_size = brc_curbe_size[i];
+    for (i = 0; i < num_brc_init_kernels; i++) {
+        if (IS_GEN8(i965->intel.device_info)) {
+            kernel_param.curbe_size = gen8_brc_curbe_size[i];
+        } else {
+            kernel_param.curbe_size = gen9_brc_curbe_size[i];
+        }
         gpe_context = &kernel_context->gpe_contexts[i];
         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
@@ -6240,9 +7227,10 @@ gen9_avc_encode_check_parameter(VADriverContextP ctx,
          rate_control_mode == VA_RC_CQP)) {
         generic_state->brc_need_reset = 0;// not support by CQP
     }
-
-    if (generic_state->brc_need_reset && !avc_state->sfd_mb_enable) {
+    if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
         avc_state->sfd_enable = 0;
+    } else {
+        avc_state->sfd_enable = 1;
     }
 
     if (generic_state->frames_per_window_size == 0) {
@@ -6271,6 +7259,7 @@ gen9_avc_encode_check_parameter(VADriverContextP ctx,
     if (avc_state->caf_supported) {
         switch (generic_state->frame_type) {
         case SLICE_TYPE_I:
+            avc_state->caf_enable = 0;
             break;
         case SLICE_TYPE_P:
             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
@@ -6755,6 +7744,40 @@ gen9_avc_vme_context_destroy(void * context)
 }
 
 static void
+gen8_avc_kernel_init(VADriverContextP ctx,
+                     struct intel_encoder_context *encoder_context)
+{
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
+    int fei_enabled = encoder_context->fei_enabled;
+
+    generic_ctx->get_kernel_header_and_size = fei_enabled ?
+                                              intel_avc_fei_get_kernel_header_and_size :
+                                              intel_avc_get_kernel_header_and_size ;
+    gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
+    gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
+    gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
+    gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
+    gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
+
+    //function pointer
+    generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
+    generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
+    generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
+    generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
+    generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
+    generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
+    generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
+
+    generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
+    generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
+    generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
+    generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
+    generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
+    generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
+}
+static void
 gen9_avc_kernel_init(VADriverContextP ctx,
                      struct intel_encoder_context *encoder_context)
 {
@@ -8523,7 +9546,6 @@ gen9_avc_encode_picture(VADriverContextP ctx,
     else
         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
     intel_batchbuffer_emit_mi_flush(batch);
-
     for (generic_state->curr_pak_pass = 0;
          generic_state->curr_pak_pass < generic_state->num_pak_passes;
          generic_state->curr_pak_pass++) {
@@ -8542,7 +9564,6 @@ gen9_avc_encode_picture(VADriverContextP ctx,
         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
         gen9_avc_read_mfc_status(ctx, encoder_context);
-
     }
 
     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
@@ -8684,6 +9705,9 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
             generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
             generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
         }
+    } else if (IS_GEN8(i965->intel.device_info)) {
+        generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
+        generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
     } else if (IS_KBL(i965->intel.device_info) ||
                IS_GLK(i965->intel.device_info)) {
         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
@@ -8876,9 +9900,12 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
 
     avc_state->lambda_table_enable = 0;
 
-
-    if (IS_SKL(i965->intel.device_info) ||
-        IS_BXT(i965->intel.device_info)) {
+    if (IS_GEN8(i965->intel.device_info)) {
+        avc_state->brc_const_data_surface_width = 64;
+        avc_state->brc_const_data_surface_height = 44;
+        avc_state->mb_status_supported = 0;
+    } else if (IS_SKL(i965->intel.device_info) ||
+               IS_BXT(i965->intel.device_info)) {
         avc_state->brc_const_data_surface_width = 64;
         avc_state->brc_const_data_surface_height = 44;
         avc_state->brc_split_enable = 1;
@@ -8926,7 +9953,11 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
 
-    gen9_avc_kernel_init(ctx, encoder_context);
+    if (IS_GEN8(i965->intel.device_info)) {
+        gen8_avc_kernel_init(ctx, encoder_context);
+    } else {
+        gen9_avc_kernel_init(ctx, encoder_context);
+    }
     encoder_context->vme_context = vme_context;
     encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;