OSDN Git Service

Stats/PreEnc: Add GEN9 AVC PreEnc support
authorSreerenj Balachandran <sreerenj.balachandran@intel.com>
Fri, 1 Dec 2017 22:47:11 +0000 (14:47 -0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Wed, 10 Jan 2018 06:34:35 +0000 (14:34 +0800)
The AVC PreEnc includes three stage processing:

1: It does the dowscaling of source and reference pictures.
Also generate the variance and pixel average of current frame,
past and future ref based on user request.

2: Second stage involves the Hierarchical Motion Estimation
Only the 4x hme is supported in PreEnc.

3:Third stage is a PreProc which can generate the Motion Vectors and
Distortion values

libva PR: https://github.com/01org/libva/pull/110

Signed-off-by: Sreerenj Balachandran <sreerenj.balachandran@intel.com>
src/i965_avc_encoder.c
src/i965_avc_encoder.h
src/i965_avc_encoder_common.h

index e4ac008..a334e0a 100644 (file)
 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
 #define FEI_AVC_DISTORTION_BUFFER_SIZE   48
 #define FEI_AVC_QP_BUFFER_SIZE           1
+#define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
+
+#define SCALE_CUR_PIC        1
+#define SCALE_PAST_REF_PIC   2
+#define SCALE_FUTURE_REF_PIC 3
 
 static const uint32_t qm_flat[16] = {
     0x10101010, 0x10101010, 0x10101010, 0x10101010,
@@ -692,15 +697,17 @@ intel_avc_fei_get_kernel_header_and_size(
 
     bin_start = (char *)pvbinary;
     pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
-    pinvalid_entry = &(pkh_table->ply_2xdscale_2f_ply_2f) + 1;
+    pinvalid_entry = &(pkh_table->wp) + 1;
     next_krnoffset = binary_size;
 
     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
-        pcurr_header = &pkh_table->ply_2xdscale_ply;
+        pcurr_header = &pkh_table->ply_dscale_ply;
     } else if (operation == INTEL_GENERIC_ENC_ME) {
         pcurr_header = &pkh_table->me_p;
     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
         pcurr_header = &pkh_table->mbenc_i;
+    } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
+        pcurr_header =  &pkh_table->preproc;
     } else {
         return false;
     }
@@ -1359,6 +1366,26 @@ gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
         }
     }
 
+    /* free preenc resources */
+    i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
+    i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
+    i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
+    i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
+
+    i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
+    i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
+
+    i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
+    avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
+    avc_ctx->preenc_scaled_4x_surface_obj = NULL;
+
+    i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
+    avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
+    avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
+
+    i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
+    avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
+    avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
 }
 
 static void
@@ -5967,196 +5994,974 @@ gen9_avc_kernel_sfd(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
-static void
-gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
-                         struct encode_state *encode_state,
-                         struct i965_gpe_context *gpe_context,
-                         struct intel_encoder_context *encoder_context,
-                         void * param)
+/**************** PreEnc Scaling *************************************/
+/* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
+ * function to set preenc scaling curbe is the same one using for avc encode
+        == gen95_avc_set_curbe_scaling4x()
+ * function to send buffer/surface resources is the same one using for avc encode
+        == gen9_avc_send_surface_scaling()
+ */
+static VAStatus
+gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
+                               struct encode_state *encode_state,
+                               struct intel_encoder_context *encoder_context,
+                               int hme_type,
+                               int scale_surface_type)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    gen8_avc_mbenc_curbe_data *cmd;
+    struct i965_gpe_table *gpe = &i965->gpe_table;
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
+    VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
+    VAStatsStatisticsParameter *stat_param = NULL;
+    struct i965_gpe_context *gpe_context;
+    struct scaling_param surface_param;
+    struct object_surface *obj_surface = NULL;
+    struct gpe_media_object_walker_parameter media_object_walker_param;
+    struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
+    unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
+    int media_function = 0;
+    int kernel_idx = 0;
+    int enable_statistics_output;
 
-    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
-    VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
-    VASurfaceID surface_id;
-    struct object_surface *obj_surface;
+    stat_param_h264 = avc_state->stat_param;
+    assert(stat_param_h264);
+    stat_param = &stat_param_h264->stats_params;
+    enable_statistics_output = !stat_param_h264->disable_statistics_output;
 
-    struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
-    unsigned char qp = 0;
-    unsigned char me_method = 0;
-    unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
-    unsigned int table_idx = 0;
-    unsigned int curbe_size = 0;
+    memset(&surface_param, 0, sizeof(struct scaling_param));
+    media_function = INTEL_MEDIA_STATE_4X_SCALING;
+    kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
+    downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
+    downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
+
+    surface_param.input_frame_width = generic_state->frame_width_in_pixel;
+    surface_param.input_frame_height = generic_state->frame_height_in_pixel;
+    surface_param.output_frame_width = generic_state->frame_width_4x;
+    surface_param.output_frame_height = generic_state->frame_height_4x;
+    surface_param.use_4x_scaling  = 1 ;
+    surface_param.use_16x_scaling = 0 ;
+    surface_param.use_32x_scaling = 0 ;
+    surface_param.enable_mb_flatness_check = enable_statistics_output;
+    surface_param.enable_mb_variance_output = enable_statistics_output;
+    surface_param.enable_mb_pixel_average_output = enable_statistics_output;
+    surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
+
+    switch (scale_surface_type) {
+
+    case  SCALE_CUR_PIC:
+        surface_param.input_surface = encode_state->input_yuv_object ;
+        surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
 
-    unsigned int preset = generic_state->preset;
-    if (IS_GEN8(i965->intel.device_info)) {
-        cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
-        if (!cmd)
-            return;
-        curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
-        memset(cmd, 0, curbe_size);
+        if (enable_statistics_output) {
+            surface_param.pres_mbv_proc_stat_buffer =
+                &avc_ctx->preproc_stat_data_out_buffer;
+            surface_param.mbv_proc_stat_enabled = 1;
+        } else {
+            surface_param.mbv_proc_stat_enabled = 0;
+            surface_param.pres_mbv_proc_stat_buffer = NULL;
+        }
+        break;
 
-        if (mbenc_i_frame_dist_in_use) {
-            memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
+    case SCALE_PAST_REF_PIC:
+        obj_surface = SURFACE(stat_param->past_references[0].picture_id);
+        assert(obj_surface);
+        surface_param.input_surface = obj_surface;
+        surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
+
+        if (stat_param->past_ref_stat_buf) {
+            surface_param.pres_mbv_proc_stat_buffer =
+                &avc_ctx->preenc_past_ref_stat_data_out_buffer;
+            surface_param.mbv_proc_stat_enabled = 1;
         } else {
-            switch (generic_state->frame_type) {
-            case SLICE_TYPE_I:
-                memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
-                break;
-            case SLICE_TYPE_P:
-                memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
-                break;
-            case SLICE_TYPE_B:
-                memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
-                break;
-            default:
-                assert(0);
-            }
+            surface_param.mbv_proc_stat_enabled = 0;
+            surface_param.pres_mbv_proc_stat_buffer = NULL;
         }
-    } else {
-        assert(0);
+        break;
 
-        return;
+    case SCALE_FUTURE_REF_PIC:
+
+        obj_surface = SURFACE(stat_param->future_references[0].picture_id);
+        assert(obj_surface);
+        surface_param.input_surface = obj_surface;
+        surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
+
+        if (stat_param->future_ref_stat_buf) {
+            surface_param.pres_mbv_proc_stat_buffer =
+                &avc_ctx->preenc_future_ref_stat_data_out_buffer;
+            surface_param.mbv_proc_stat_enabled = 1;
+        } else {
+            surface_param.mbv_proc_stat_enabled = 0;
+            surface_param.pres_mbv_proc_stat_buffer = NULL;
+        }
+        break;
+    default :
+        assert(0);
     }
 
-    me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
-    qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
 
-    cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
-    cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
-    cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
-    cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
+    gpe->context_init(ctx, gpe_context);
+    gpe->reset_binding_table(ctx, gpe_context);
 
-    cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
-    cmd->dw38.max_len_sp = 0;
+    generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
 
-    cmd->dw3.src_access = 0;
-    cmd->dw3.ref_access = 0;
+    surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
+    surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
 
-    if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
-        //disable ftq_override by now.
-        if (avc_state->ftq_override) {
-            cmd->dw3.ftq_enable = avc_state->ftq_enable;
+    /* No need of explicit flatness_check surface allocation. The field mb_is_flat
+     * VAStatsStatisticsH264 will be used to store the output.  */
+    surface_param.enable_mb_flatness_check = 0;
+    generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
 
-        } else {
-            if (generic_state->frame_type == SLICE_TYPE_P) {
-                cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
+    /* setup the interface data */
+    gpe->setup_interface_data(ctx, gpe_context);
 
-            } else {
-                cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
-            }
-        }
-    } else {
-        cmd->dw3.ftq_enable = 0;
-    }
+    memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+    /* the scaling is based on 8x8 blk level */
+    kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
+    kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
+    kernel_walker_param.no_dependency = 1;
 
-    if (avc_state->disable_sub_mb_partion)
-        cmd->dw3.sub_mb_part_mask = 0x7;
+    i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
 
-    if (mbenc_i_frame_dist_in_use) {
-        cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
-        cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
-        cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
-        cmd->dw6.batch_buffer_end = 0;
-        cmd->dw31.intra_compute_type = 1;
-    } else {
-        cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
-        cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
-        cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
+    gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
+                                            gpe_context,
+                                            media_function,
+                                            &media_object_walker_param);
 
-        {
-            memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
-            if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
-            } else if (avc_state->skip_bias_adjustment_enable) {
-                /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
-                // No need to check for P picture as the flag is only enabled for P picture */
-                cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
-            }
-        }
-        table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
-        memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
-    }
-    cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
-    cmd->dw4.field_parity_flag = 0;//bottom field
-    cmd->dw4.enable_cur_fld_idr = 0;//field realted
-    cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
-    cmd->dw4.hme_enable = generic_state->hme_enabled;
-    cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
-    cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
+    return VA_STATUS_SUCCESS;
+}
 
-    cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
-    cmd->dw7.src_field_polarity = 0;//field related
+/**************** PreEnc HME *************************************/
+/* function to run preenc hme is the same one we using in avc encode:
+         ==  gen9_avc_kernel_me()
+ * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
+ * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
+ */
+static void
+gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
+                             struct encode_state *encode_state,
+                             struct i965_gpe_context *gpe_context,
+                             struct intel_encoder_context *encoder_context,
+                             void * param)
+{
+    gen9_avc_fei_me_curbe_data *curbe_cmd;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
+    VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
 
-    /*ftq_skip_threshold_lut set,dw14 /15*/
+    struct me_param * curbe_param = (struct me_param *)param ;
+    unsigned char  use_mv_from_prev_step = 0;
+    unsigned char write_distortions = 0;
+    unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
+    unsigned char seach_table_idx = 0;
+    unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
+    unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
+    unsigned int scale_factor = 0;
 
-    /*r5 disable NonFTQSkipThresholdLUT*/
-    if (generic_state->frame_type == SLICE_TYPE_P) {
-        cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
-    } else if (generic_state->frame_type == SLICE_TYPE_B) {
-        cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
+    switch (curbe_param->hme_type) {
+    case INTEL_ENC_HME_4x:
+        use_mv_from_prev_step = 0;
+        write_distortions = 0;
+        mv_shift_factor = 2;
+        scale_factor = 4;
+        prev_mv_read_pos_factor = 0;
+        break;
+
+    default:
+        assert(0);
     }
 
-    cmd->dw13.qp_prime_y = qp;
-    cmd->dw13.qp_prime_cb = qp;
-    cmd->dw13.qp_prime_cr = qp;
-    cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
+    curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
+    if (!curbe_cmd)
+        return;
 
-    if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
-        switch (gen9_avc_multi_pred[preset]) {
-        case 0:
-            cmd->dw32.mult_pred_l0_disable = 128;
-            cmd->dw32.mult_pred_l1_disable = 128;
-            break;
-        case 1:
-            cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
-            cmd->dw32.mult_pred_l1_disable = 128;
-            break;
-        case 2:
-            cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
-            cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
-            break;
-        case 3:
-            cmd->dw32.mult_pred_l0_disable = 1;
-            cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
-            break;
-        }
+    downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
+    downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
 
-    } else {
-        cmd->dw32.mult_pred_l0_disable = 128;
-        cmd->dw32.mult_pred_l1_disable = 128;
+    memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
+
+    curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
+    if (avc_state->field_scaling_output_interleaved) {
+        /*frame set to zero,field specified*/
+        curbe_cmd->dw3.src_access = 0;
+        curbe_cmd->dw3.ref_access = 0;
+        curbe_cmd->dw7.src_field_polarity = 0;
     }
+    curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
+    curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
+    curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
+
+    curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
+    curbe_cmd->dw6.write_distortions = write_distortions;
+    curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
+    curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
 
     if (generic_state->frame_type == SLICE_TYPE_B) {
-        cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
-        cmd->dw34.list1_ref_id0_frm_field_parity = 0;
-        cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
+        curbe_cmd->dw1.bi_weight = 32;
+        curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
+        me_method = gen9_avc_b_me_method[generic_state->preset];
+        seach_table_idx = 1;
     }
 
-    cmd->dw34.b_original_bff = 0; //frame only
-    cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
-    cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
-    cmd->dw34.mad_enable_falg = avc_state->mad_enable;
-    cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
-    cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
-    cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
+    if (generic_state->frame_type == SLICE_TYPE_P ||
+        generic_state->frame_type == SLICE_TYPE_B)
+        curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
 
-    if (cmd->dw34.force_non_skip_check) {
-        cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
-    }
+    curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
+    curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
 
-    cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
-    cmd->dw38.ref_threshold = 400;
-    cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
-    cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
+    memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
 
-    if (mbenc_i_frame_dist_in_use) {
-        cmd->dw13.qp_prime_y = 0;
-        cmd->dw13.qp_prime_cb = 0;
-        cmd->dw13.qp_prime_cr = 0;
+    curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
+    curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
+    curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
+    curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
+    curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
+    curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
+    curbe_cmd->dw38.reserved = 0;
+
+    i965_gpe_context_unmap_curbe(gpe_context);
+    return;
+}
+
+static void
+gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct i965_gpe_context *gpe_context,
+                                struct intel_encoder_context *encoder_context,
+                                void * param)
+{
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
+    VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+    struct object_surface *input_surface;
+    struct i965_gpe_resource *gpe_resource;
+    struct me_param * curbe_param = (struct me_param *)param ;
+    int i = 0;
+
+    /* PreEnc Only supports 4xme */
+    assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
+
+    switch (curbe_param->hme_type) {
+    case INTEL_ENC_HME_4x : {
+        /*memv output 4x*/
+        gpe_resource = &avc_ctx->s4x_memv_data_buffer;
+        i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
+
+        /* memv distortion output*/
+        gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
+        i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
+
+        /* brc distortion  output*/
+        gpe_resource = &avc_ctx->res_brc_dist_data_surface;
+        i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_ME_BRC_DISTORTION_INDEX);
+
+        /* input past ref scaled YUV surface*/
+        for (i = 0; i < stat_param->num_past_references; i++) {
+            /*input current down scaled YUV surface for forward refef */
+            input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
+            i965_add_adv_gpe_surface(ctx, gpe_context,
+                                     input_surface,
+                                     GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
+
+            input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
+            i965_add_adv_gpe_surface(ctx, gpe_context,
+                                     input_surface,
+                                     GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
+        }
+
+        /* input future ref scaled YUV surface*/
+        for (i = 0; i < stat_param->num_future_references; i++) {
+            /*input current down scaled YUV surface for backward ref */
+            input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
+            i965_add_adv_gpe_surface(ctx, gpe_context,
+                                     input_surface,
+                                     GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
+
+            input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
+            i965_add_adv_gpe_surface(ctx, gpe_context,
+                                     input_surface,
+                                     GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
+        }
+        break;
+
+    }
+    default:
+        break;
+
+    }
+}
+
+/**************** PreEnc PreProc *************************************/
+/* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
+ * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
+ * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
+ */
+static void
+gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
+                                  struct encode_state *encode_state,
+                                  struct i965_gpe_context *gpe_context,
+                                  struct intel_encoder_context *encoder_context,
+                                  void * param)
+{
+    gen9_avc_preproc_curbe_data *cmd;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
+    VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+    unsigned char me_method = 0;
+    unsigned int table_idx = 0;
+    int ref_width, ref_height, len_sp;
+    int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
+    int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
+    unsigned int preset = generic_state->preset;
+
+    cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
+    if (!cmd)
+        return;
+    memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
+
+    switch (generic_state->frame_type) {
+    case SLICE_TYPE_I:
+        memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
+               sizeof(gen9_avc_preproc_curbe_data));
+        break;
+    case SLICE_TYPE_P:
+        memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
+               sizeof(gen9_avc_preproc_curbe_data));
+        break;
+    case SLICE_TYPE_B:
+        memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
+               sizeof(gen9_avc_preproc_curbe_data));
+        break;
+    default:
+        assert(0);
+    }
+    /* 4 means full search, 6 means diamand search */
+    me_method  = (stat_param_h264->search_window == 5) ||
+                 (stat_param_h264->search_window == 8) ? 4 : 6;
+
+    ref_width    = stat_param_h264->ref_width;
+    ref_height   = stat_param_h264->ref_height;
+    len_sp       = stat_param_h264->len_sp;
+    /* If there is a serch_window, discard user provided ref_width, ref_height
+     * and search_path length */
+    switch (stat_param_h264->search_window) {
+    case 0:
+        /*  not use predefined search window, there should be a search_path input */
+        if ((stat_param_h264->search_path != 0) &&
+            (stat_param_h264->search_path != 1) &&
+            (stat_param_h264->search_path != 2)) {
+            WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
+            assert(0);
+        }
+        /* 4 means full search, 6 means diamand search */
+        me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
+        if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
+            WARN_ONCE("Invalid input ref_width/ref_height in"
+                      "SearchWindow=0 case! \n");
+            assert(0);
+        }
+        break;
+
+    case 1:
+        /* Tiny - 4 SUs 24x24 window */
+        ref_width  = 24;
+        ref_height = 24;
+        len_sp     = 4;
+        break;
+
+    case 2:
+        /* Small - 9 SUs 28x28 window */
+        ref_width  = 28;
+        ref_height = 28;
+        len_sp     = 9;
+        break;
+    case 3:
+        /* Diamond - 16 SUs 48x40 window */
+        ref_width  = 48;
+        ref_height = 40;
+        len_sp     = 16;
+        break;
+    case 4:
+        /* Large Diamond - 32 SUs 48x40 window */
+        ref_width  = 48;
+        ref_height = 40;
+        len_sp     = 32;
+        break;
+    case 5:
+        /* Exhaustive - 48 SUs 48x40 window */
+        ref_width  = 48;
+        ref_height = 40;
+        len_sp     = 48;
+        break;
+    case 6:
+        /* Diamond - 16 SUs 64x32 window */
+        ref_width  = 64;
+        ref_height = 32;
+        len_sp     = 16;
+        break;
+    case 7:
+        /* Large Diamond - 32 SUs 64x32 window */
+        ref_width  = 64;
+        ref_height = 32;
+        len_sp     = 32;
+        break;
+    case 8:
+        /* Exhaustive - 48 SUs 64x32 window */
+        ref_width  = 64;
+        ref_height = 32;
+        len_sp     = 48;
+        break;
+
+    default:
+        assert(0);
+    }
+
+    /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
+    if (is_bframe) {
+        CLIP(ref_width, 4, 32);
+        CLIP(ref_height, 4, 32);
+    } else if (is_pframe) {
+        CLIP(ref_width, 4, 64);
+        CLIP(ref_height, 4, 32);
+    }
+
+    cmd->dw0.adaptive_enable =
+        cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
+    cmd->dw2.max_len_sp = len_sp;
+    cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
+    cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
+    cmd->dw3.src_access =
+        cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
+
+    if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
+        cmd->dw3.ft_enable = stat_param_h264->ft_enable;
+    else
+        cmd->dw3.ft_enable = 0;
+
+    cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
+    cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
+    cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
+    cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
+    cmd->dw3.inter_sad = stat_param_h264->inter_sad;
+    cmd->dw3.intra_sad = stat_param_h264->intra_sad;
+    cmd->dw4.hme_enable = generic_state->hme_enabled;
+    cmd->dw4.frame_qp = stat_param_h264->frame_qp;
+    cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
+
+    cmd->dw4.multiple_mv_predictor_per_mb_enable =
+        (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
+
+    cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
+    cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
+
+    cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
+    cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
+
+    cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
+
+    /* mv mode cost */
+    memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
+
+    /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
+    memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
+
+    /* search path tables */
+    table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
+    memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
+
+    if (stat_param_h264->intra_part_mask  == 0x07)
+        cmd->dw31.intra_compute_type  = 3;
+
+    cmd->dw38.ref_threshold = 400;
+    cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
+
+    if (generic_state->frame_type == SLICE_TYPE_I) {
+        cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
+        cmd->dw36.hme_combine_overlap = 0;
+    } else if (generic_state->frame_type == SLICE_TYPE_P) {
+        cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
+        cmd->dw3.bme_disable_fbr = 1;
+        cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
+        cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
+        cmd->dw7.non_skip_zmv_added = 1;
+        cmd->dw7.non_skip_mode_added = 1;
+        cmd->dw7.skip_center_mask = 1;
+        cmd->dw32.max_vmv_r =
+            i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
+        cmd->dw36.hme_combine_overlap = 1;
+
+    } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
+
+        cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
+        cmd->dw3.search_ctrl = 0;
+        cmd->dw3.skip_type = 1;
+        cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
+        cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
+        cmd->dw7.skip_center_mask = 0xff;
+        cmd->dw32.max_vmv_r =
+            i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
+        cmd->dw36.hme_combine_overlap = 1;
+    }
+
+    cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
+    cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
+    cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
+    cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
+    cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
+    cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
+    cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
+    cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
+    cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
+
+    i965_gpe_context_unmap_curbe(gpe_context);
+}
+
+static void
+gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
+                                     struct encode_state *encode_state,
+                                     struct i965_gpe_context *gpe_context,
+                                     struct intel_encoder_context *encoder_context,
+                                     void * param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    struct object_surface *obj_surface;
+    struct i965_gpe_resource *gpe_resource;
+    VASurfaceID surface_id;
+    VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
+    VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+    unsigned int size = 0, frame_mb_nums = 0;
+
+    frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
+
+    /* input yuv surface, Y index */
+    obj_surface = encode_state->input_yuv_object;
+    i965_add_2d_gpe_surface(ctx,
+                            gpe_context,
+                            obj_surface,
+                            0,
+                            1,
+                            I965_SURFACEFORMAT_R8_UNORM,
+                            GEN9_AVC_PREPROC_CURR_Y_INDEX);
+
+    /* input yuv surface, UV index */
+    i965_add_2d_gpe_surface(ctx,
+                            gpe_context,
+                            obj_surface,
+                            1,
+                            1,
+                            I965_SURFACEFORMAT_R16_UINT,
+                            GEN9_AVC_MBENC_CURR_UV_INDEX);
+
+
+    if (generic_state->hme_enabled) {
+        /* HME mv data buffer */
+        gpe_resource = &avc_ctx->s4x_memv_data_buffer;
+        i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
+    }
+
+    /* mv predictor buffer */
+    if (stat_param_h264->mv_predictor_ctrl) {
+        size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
+        gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
+        i965_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    gpe_resource,
+                                    0,
+                                    size / 4,
+                                    0,
+                                    GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
+    }
+
+    /* MB qp buffer */
+    if (stat_param_h264->mb_qp) {
+        size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
+        gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
+        i965_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    gpe_resource,
+                                    0,
+                                    size / 4,
+                                    0,
+                                    GEN9_AVC_PREPROC_MBQP_INDEX);
+
+        gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
+        size = 16 * AVC_QP_MAX * 4;
+        i965_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    gpe_resource,
+                                    0,
+                                    size / 4,
+                                    0,
+                                    GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
+
+    }
+
+    /* mv data output buffer */
+    if (!stat_param_h264->disable_mv_output) {
+        gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
+        size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
+        i965_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    gpe_resource,
+                                    0,
+                                    size / 4,
+                                    0,
+                                    GEN9_AVC_PREPROC_MV_DATA_INDEX);
+    }
+
+    /* statistics output buffer */
+    if (!stat_param_h264->disable_statistics_output) {
+        gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
+        size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
+        i965_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    gpe_resource,
+                                    0,
+                                    size / 4,
+                                    0,
+                                    GEN9_AVC_PREPROC_MB_STATS_INDEX);
+    }
+
+    /* vme cur pic y */
+    obj_surface = encode_state->input_yuv_object;
+    i965_add_2d_gpe_surface(ctx,
+                            gpe_context,
+                            obj_surface,
+                            0,
+                            1,
+                            I965_SURFACEFORMAT_R8_UNORM,
+                            GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
+
+    /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
+    obj_surface = encode_state->input_yuv_object;
+    i965_add_2d_gpe_surface(ctx,
+                            gpe_context,
+                            obj_surface,
+                            0,
+                            1,
+                            I965_SURFACEFORMAT_R8_UNORM,
+                            GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
+
+    /* vme forward ref */
+    /* Only supports one past ref */
+    if (stat_param->num_past_references > 0) {
+        surface_id = stat_param->past_references[0].picture_id;
+        assert(surface_id != VA_INVALID_ID);
+        obj_surface = SURFACE(surface_id);
+        if (!obj_surface)
+            return;
+        i965_add_adv_gpe_surface(ctx, gpe_context,
+                                 obj_surface,
+                                 GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
+
+    }
+
+    /* vme future ref */
+    /* Only supports one future ref */
+    if (stat_param->num_future_references > 0) {
+        surface_id = stat_param->future_references[0].picture_id;
+        assert(surface_id != VA_INVALID_ID);
+        obj_surface = SURFACE(surface_id);
+        if (!obj_surface)
+            return;
+        i965_add_adv_gpe_surface(ctx, gpe_context,
+                                 obj_surface,
+                                 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
+
+        surface_id = stat_param->future_references[0].picture_id;
+        assert(surface_id != VA_INVALID_ID);
+        obj_surface = SURFACE(surface_id);
+        if (!obj_surface)
+            return;
+        i965_add_adv_gpe_surface(ctx, gpe_context,
+                                 obj_surface,
+                                 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
+    }
+
+    return;
+
+}
+
+static VAStatus
+gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
+                               struct encode_state *encode_state,
+                               struct intel_encoder_context *encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_gpe_table *gpe = &i965->gpe_table;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
+    struct i965_gpe_context *gpe_context;
+    struct gpe_media_object_walker_parameter media_object_walker_param;
+    struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
+    int media_function = INTEL_MEDIA_STATE_PREPROC;
+    struct i965_gpe_resource *gpe_resource = NULL;
+    unsigned int * data = NULL;
+    unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
+
+    gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
+    gpe->context_init(ctx, gpe_context);
+    gpe->reset_binding_table(ctx, gpe_context);
+
+    /*set curbe*/
+    generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
+
+    /*send surface*/
+    generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
+
+    gpe->setup_interface_data(ctx, gpe_context);
+
+    /*  Set up FtqLut Buffer if there is QP change within a frame */
+    if (stat_param_h264->mb_qp) {
+        gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
+        assert(gpe_resource);
+        data = i965_map_gpe_resource(gpe_resource);
+        assert(data);
+        memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
+    }
+
+    memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+    kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
+    kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
+    kernel_walker_param.no_dependency = 1;
+
+    i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+
+    gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
+                                            gpe_context,
+                                            media_function,
+                                            &media_object_walker_param);
+
+    return VA_STATUS_SUCCESS;
+}
+
+
+static void
+gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
+                         struct encode_state *encode_state,
+                         struct i965_gpe_context *gpe_context,
+                         struct intel_encoder_context *encoder_context,
+                         void * param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    gen8_avc_mbenc_curbe_data *cmd;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+
+    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+    VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
+    VASurfaceID surface_id;
+    struct object_surface *obj_surface;
+
+    struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
+    unsigned char qp = 0;
+    unsigned char me_method = 0;
+    unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
+    unsigned int table_idx = 0;
+    unsigned int curbe_size = 0;
+
+    unsigned int preset = generic_state->preset;
+    if (IS_GEN8(i965->intel.device_info)) {
+        cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
+        if (!cmd)
+            return;
+        curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
+        memset(cmd, 0, curbe_size);
+
+        if (mbenc_i_frame_dist_in_use) {
+            memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
+        } else {
+            switch (generic_state->frame_type) {
+            case SLICE_TYPE_I:
+                memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
+                break;
+            case SLICE_TYPE_P:
+                memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
+                break;
+            case SLICE_TYPE_B:
+                memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
+                break;
+            default:
+                assert(0);
+            }
+        }
+    } else {
+        assert(0);
+
+        return;
+    }
+
+    me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
+    qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+
+    cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
+    cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
+    cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
+    cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
+
+    cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
+    cmd->dw38.max_len_sp = 0;
+
+    cmd->dw3.src_access = 0;
+    cmd->dw3.ref_access = 0;
+
+    if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
+        //disable ftq_override by now.
+        if (avc_state->ftq_override) {
+            cmd->dw3.ftq_enable = avc_state->ftq_enable;
+
+        } else {
+            if (generic_state->frame_type == SLICE_TYPE_P) {
+                cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
+
+            } else {
+                cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
+            }
+        }
+    } else {
+        cmd->dw3.ftq_enable = 0;
+    }
+
+    if (avc_state->disable_sub_mb_partion)
+        cmd->dw3.sub_mb_part_mask = 0x7;
+
+    if (mbenc_i_frame_dist_in_use) {
+        cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
+        cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
+        cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
+        cmd->dw6.batch_buffer_end = 0;
+        cmd->dw31.intra_compute_type = 1;
+    } else {
+        cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
+        cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
+        cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
+
+        {
+            memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
+            if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
+            } else if (avc_state->skip_bias_adjustment_enable) {
+                /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
+                // No need to check for P picture as the flag is only enabled for P picture */
+                cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
+            }
+        }
+        table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
+        memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
+    }
+    cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
+    cmd->dw4.field_parity_flag = 0;//bottom field
+    cmd->dw4.enable_cur_fld_idr = 0;//field realted
+    cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
+    cmd->dw4.hme_enable = generic_state->hme_enabled;
+    cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
+    cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
+
+    cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
+    cmd->dw7.src_field_polarity = 0;//field related
+
+    /*ftq_skip_threshold_lut set,dw14 /15*/
+
+    /*r5 disable NonFTQSkipThresholdLUT*/
+    if (generic_state->frame_type == SLICE_TYPE_P) {
+        cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
+    } else if (generic_state->frame_type == SLICE_TYPE_B) {
+        cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
+    }
+
+    cmd->dw13.qp_prime_y = qp;
+    cmd->dw13.qp_prime_cb = qp;
+    cmd->dw13.qp_prime_cr = qp;
+    cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
+
+    if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
+        switch (gen9_avc_multi_pred[preset]) {
+        case 0:
+            cmd->dw32.mult_pred_l0_disable = 128;
+            cmd->dw32.mult_pred_l1_disable = 128;
+            break;
+        case 1:
+            cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
+            cmd->dw32.mult_pred_l1_disable = 128;
+            break;
+        case 2:
+            cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
+            cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
+            break;
+        case 3:
+            cmd->dw32.mult_pred_l0_disable = 1;
+            cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
+            break;
+        }
+
+    } else {
+        cmd->dw32.mult_pred_l0_disable = 128;
+        cmd->dw32.mult_pred_l1_disable = 128;
+    }
+
+    if (generic_state->frame_type == SLICE_TYPE_B) {
+        cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
+        cmd->dw34.list1_ref_id0_frm_field_parity = 0;
+        cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
+    }
+
+    cmd->dw34.b_original_bff = 0; //frame only
+    cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
+    cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
+    cmd->dw34.mad_enable_falg = avc_state->mad_enable;
+    cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
+    cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
+    cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
+
+    if (cmd->dw34.force_non_skip_check) {
+        cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
+    }
+
+    cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
+    cmd->dw38.ref_threshold = 400;
+    cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
+    cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
+
+    if (mbenc_i_frame_dist_in_use) {
+        cmd->dw13.qp_prime_y = 0;
+        cmd->dw13.qp_prime_cb = 0;
+        cmd->dw13.qp_prime_cr = 0;
         cmd->dw33.intra_16x16_nondc_penalty = 0;
         cmd->dw33.intra_8x8_nondc_penalty = 0;
         cmd->dw33.intra_4x4_nondc_penalty = 0;
@@ -6599,7 +7404,8 @@ kernel related function:init/destroy etc
 static void
 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
                              struct generic_encoder_context *generic_context,
-                             struct gen_avc_scaling_context *kernel_context)
+                             struct gen_avc_scaling_context *kernel_context,
+                             int preenc_enabled)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct i965_gpe_table *gpe = &i965->gpe_table;
@@ -6611,8 +7417,14 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
     memset(&kernel_param, 0, sizeof(kernel_param));
     if (IS_SKL(i965->intel.device_info) ||
         IS_BXT(i965->intel.device_info)) {
-        kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
-        kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
+        if (!preenc_enabled) {
+            kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
+            kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
+        } else {
+            /* Skylake PreEnc using GEN95/gen10 DS kernel */
+            kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
+            kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
+        }
     } else if (IS_KBL(i965->intel.device_info) ||
                IS_GLK(i965->intel.device_info)) {
         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
@@ -6638,17 +7450,21 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
 
     memset(&common_kernel, 0, sizeof(common_kernel));
 
-    intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
-                                         generic_context->enc_kernel_size,
-                                         INTEL_GENERIC_ENC_SCALING4X,
-                                         0,
-                                         &common_kernel);
+    generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                                generic_context->enc_kernel_size,
+                                                INTEL_GENERIC_ENC_SCALING4X,
+                                                0,
+                                                &common_kernel);
 
     gpe->load_kernels(ctx,
                       gpe_context,
                       &common_kernel,
                       1);
 
+    /* PreEnc using only the 4X scaling */
+    if (preenc_enabled)
+        return;
+
     /*2x scaling kernel*/
     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
     kernel_param.inline_data_size = 0;
@@ -6666,17 +7482,73 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
                                          0,
                                          &common_kernel);
 
-    gpe->load_kernels(ctx,
-                      gpe_context,
-                      &common_kernel,
-                      1);
+    gpe->load_kernels(ctx,
+                      gpe_context,
+                      &common_kernel,
+                      1);
+
+}
+
+static void
+gen9_avc_kernel_init_me(VADriverContextP ctx,
+                        struct generic_encoder_context *generic_context,
+                        struct gen_avc_me_context *kernel_context,
+                        int preenc_enabled)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_gpe_table *gpe = &i965->gpe_table;
+    struct i965_gpe_context *gpe_context = NULL;
+    struct encoder_kernel_parameter kernel_param ;
+    struct encoder_scoreboard_parameter scoreboard_param;
+    struct i965_kernel common_kernel;
+    int i = 0;
+    unsigned int curbe_size = 0;
+
+    if (IS_GEN8(i965->intel.device_info)) {
+        curbe_size = sizeof(gen8_avc_me_curbe_data);
+    } else {
+        if (!preenc_enabled)
+            curbe_size = sizeof(gen9_avc_me_curbe_data);
+        else
+            curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
+    }
+
+    kernel_param.curbe_size = curbe_size;
+    kernel_param.inline_data_size = 0;
+    kernel_param.sampler_size = 0;
+
+    memset(&scoreboard_param, 0, sizeof(scoreboard_param));
+    scoreboard_param.mask = 0xFF;
+    scoreboard_param.enable = generic_context->use_hw_scoreboard;
+    scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
+    scoreboard_param.walkpat_flag = 0;
+
+    /* There is two hme kernel, one for P and other for B frame */
+    for (i = 0; i < 2; i++) {
+        gpe_context = &kernel_context->gpe_contexts[i];
+        gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
+        gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
+
+        memset(&common_kernel, 0, sizeof(common_kernel));
+
+        generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                                    generic_context->enc_kernel_size,
+                                                    INTEL_GENERIC_ENC_ME,
+                                                    i,
+                                                    &common_kernel);
+
+        gpe->load_kernels(ctx,
+                          gpe_context,
+                          &common_kernel,
+                          1);
+    }
 
 }
 
 static void
-gen9_avc_kernel_init_me(VADriverContextP ctx,
-                        struct generic_encoder_context *generic_context,
-                        struct gen_avc_me_context *kernel_context)
+gen9_avc_kernel_init_preproc(VADriverContextP ctx,
+                             struct generic_encoder_context *generic_context,
+                             struct gen_avc_preproc_context *kernel_context)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct i965_gpe_table *gpe = &i965->gpe_table;
@@ -6684,13 +7556,8 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
     struct encoder_kernel_parameter kernel_param ;
     struct encoder_scoreboard_parameter scoreboard_param;
     struct i965_kernel common_kernel;
-    int i = 0;
 
-    if (IS_GEN8(i965->intel.device_info)) {
-        kernel_param.curbe_size = sizeof(gen8_avc_me_curbe_data);
-    } else {
-        kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
-    }
+    kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
     kernel_param.inline_data_size = 0;
     kernel_param.sampler_size = 0;
 
@@ -6700,24 +7567,22 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
     scoreboard_param.walkpat_flag = 0;
 
-    for (i = 0; i < 2; i++) {
-        gpe_context = &kernel_context->gpe_contexts[i];
-        gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
-        gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
+    gpe_context = &kernel_context->gpe_contexts;
+    gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
+    gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
 
-        memset(&common_kernel, 0, sizeof(common_kernel));
+    memset(&common_kernel, 0, sizeof(common_kernel));
 
-        intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+    intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
                                              generic_context->enc_kernel_size,
-                                             INTEL_GENERIC_ENC_ME,
-                                             i,
+                                             INTEL_GENERIC_ENC_PREPROC,
+                                             0,
                                              &common_kernel);
 
-        gpe->load_kernels(ctx,
-                          gpe_context,
-                          &common_kernel,
-                          1);
-    }
+    gpe->load_kernels(ctx,
+                      gpe_context,
+                      &common_kernel,
+                      1);
 
 }
 
@@ -6968,6 +7833,8 @@ gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
 
     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
 
+    gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
+
 }
 
 /*
@@ -7717,6 +8584,391 @@ gen9_avc_vme_pipeline(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
+/* Update PreEnc specific parameters */
+static VAStatus
+gen9_avc_preenc_update_parameters(VADriverContextP ctx,
+                                  VAProfile profile,
+                                  struct encode_state *encode_state,
+                                  struct intel_encoder_context *encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
+    VAStatsStatisticsParameter *stat_param = NULL;
+    struct object_buffer *obj_buffer = NULL;
+    struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
+    struct buffer_store *buffer_store = NULL;
+    unsigned int size = 0, i = 0;
+    unsigned int frame_mb_nums = 0;
+
+    if (!encoder_context->preenc_enabled ||
+        !encode_state->stat_param_ext ||
+        !encode_state->stat_param_ext->buffer)
+        return VA_STATUS_ERROR_OPERATION_FAILED;
+
+    stat_param_h264 = avc_state->stat_param =
+                          (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
+    stat_param = &stat_param_h264->stats_params;
+
+    /* Assume the frame type based on number of past/future ref frames */
+    if (!stat_param->num_past_references && !stat_param->num_future_references)
+        generic_state->frame_type = SLICE_TYPE_I;
+    else if (stat_param->num_future_references > 0)
+        generic_state->frame_type = SLICE_TYPE_B;
+    else
+        generic_state->frame_type = SLICE_TYPE_P;
+
+    generic_state->preset = INTEL_PRESET_RT_SPEED;
+    generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
+
+    /* frame width and height */
+    generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
+    generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
+    generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
+    generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
+
+    /* 4x downscaled width and height */
+    generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
+    generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
+    generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
+    generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
+
+    /* reset hme types for preenc */
+    if (generic_state->frame_type != SLICE_TYPE_I)
+        generic_state->hme_enabled = 1;
+
+    /* ensure frame width is not too small */
+    if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
+        generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
+        generic_state->downscaled_width_4x_in_mb =
+            WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
+    }
+
+    /* ensure frame height is not too small*/
+    if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
+        generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
+        generic_state->downscaled_height_4x_in_mb =
+            WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
+    }
+
+    /********** Ensure buffer object parameters ********/
+    frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
+
+    /* mv predictor buffer */
+    if (stat_param_h264->mv_predictor_ctrl) {
+        if (stat_param->mv_predictor == VA_INVALID_ID)
+            goto error;
+        size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
+        obj_buffer = BUFFER(stat_param->mv_predictor);
+        buffer_store = obj_buffer->buffer_store;
+        if (buffer_store->bo->size < size)
+            goto error;
+        if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
+            i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
+        i965_dri_object_to_buffer_gpe_resource(
+            &avc_ctx->preproc_mv_predictor_buffer,
+            buffer_store->bo);
+    }
+
+    /* MB qp buffer */
+    if (stat_param_h264->mb_qp) {
+        if (stat_param->qp == VA_INVALID_ID)
+            goto error;
+        size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
+        obj_buffer = BUFFER(stat_param->qp);
+        buffer_store = obj_buffer->buffer_store;
+        if (buffer_store->bo->size < size)
+            goto error;
+        if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
+            i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
+        i965_dri_object_to_buffer_gpe_resource(
+            &avc_ctx->preproc_mb_qp_buffer,
+            buffer_store->bo);
+    }
+
+    /* locate mv and stat buffer */
+    if (!stat_param_h264->disable_mv_output ||
+        !stat_param_h264->disable_statistics_output) {
+
+        if (!stat_param->outputs)
+            goto error;
+
+        for (i = 0; i < 2 ; i++) {
+            if (stat_param->outputs[i] != VA_INVALID_ID) {
+                obj_buffer = BUFFER(stat_param->outputs[i]);
+                switch (obj_buffer->type) {
+                case VAStatsMVBufferType:
+                    obj_buffer_mv = obj_buffer;
+                    break;
+                case VAStatsStatisticsBufferType:
+                    obj_buffer_stat = obj_buffer;
+                    break;
+                default:
+                    assert(0);
+                }
+            }
+            if (!(!stat_param_h264->disable_mv_output &&
+                  !stat_param_h264->disable_statistics_output))
+                break;
+        }
+    }
+    /* mv data output buffer */
+    if (!stat_param_h264->disable_mv_output) {
+        size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
+        buffer_store = obj_buffer_mv->buffer_store;
+        if (buffer_store->bo->size < size)
+            goto error;
+        if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
+            i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
+        i965_dri_object_to_buffer_gpe_resource(
+            &avc_ctx->preproc_mv_data_out_buffer,
+            buffer_store->bo);
+    }
+    /* statistics output buffer */
+    if (!stat_param_h264->disable_statistics_output) {
+        size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
+        buffer_store = obj_buffer_stat->buffer_store;
+        if (buffer_store->bo->size < size)
+            goto error;
+        if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
+            i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
+        i965_dri_object_to_buffer_gpe_resource(
+            &avc_ctx->preproc_stat_data_out_buffer,
+            buffer_store->bo);
+    }
+
+    /* past ref stat out buffer */
+    if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
+        stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
+        size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
+        obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
+        buffer_store = obj_buffer->buffer_store;
+        if (buffer_store->bo->size < size)
+            goto error;
+        if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
+            i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
+        i965_dri_object_to_buffer_gpe_resource(
+            &avc_ctx->preenc_past_ref_stat_data_out_buffer,
+            buffer_store->bo);
+    }
+    /* future ref stat out buffer */
+    if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
+        stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
+        size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
+        obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
+        buffer_store = obj_buffer->buffer_store;
+        if (buffer_store->bo->size < size)
+            goto error;
+        if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
+            i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
+        i965_dri_object_to_buffer_gpe_resource(
+            &avc_ctx->preenc_future_ref_stat_data_out_buffer,
+            buffer_store->bo);
+    }
+    return VA_STATUS_SUCCESS;
+
+error:
+    return VA_STATUS_ERROR_INVALID_BUFFER;
+}
+
+/* allocate internal resouces required for PreEenc */
+static VAStatus
+gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
+                                            struct encode_state *encode_state,
+                                            struct intel_encoder_context *encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    unsigned int width  = 0;
+    unsigned int height  = 0;
+    unsigned int size  = 0;
+    int allocate_flag = 1;
+
+    /* 4x MEMV data buffer */
+    width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
+    height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
+    i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
+    allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
+                                                  &avc_ctx->s4x_memv_data_buffer,
+                                                  width, height,
+                                                  width,
+                                                  "4x MEMV data buffer");
+    if (!allocate_flag)
+        goto failed_allocation;
+    i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
+
+    /*  Output DISTORTION surface from 4x ME */
+    width = generic_state->downscaled_width_4x_in_mb * 8;
+    height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
+    i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
+    allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
+                                                  &avc_ctx->s4x_memv_distortion_buffer,
+                                                  width, height,
+                                                  ALIGN(width, 64),
+                                                  "4x MEMV distortion buffer");
+    if (!allocate_flag)
+        goto failed_allocation;
+    i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
+
+    /* output BRC DISTORTION surface from 4x ME  */
+    width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
+    height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
+    i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
+    allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
+                                                  &avc_ctx->res_brc_dist_data_surface,
+                                                  width, height,
+                                                  width,
+                                                  "brc dist data buffer");
+    if (!allocate_flag)
+        goto failed_allocation;
+    i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
+
+
+    /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
+    i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
+    size = 16 * AVC_QP_MAX * 4;
+    allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
+                                               &avc_ctx->res_mbbrc_const_data_buffer,
+                                               ALIGN(size, 0x1000),
+                                               "mbbrc const data buffer");
+    if (!allocate_flag)
+        goto failed_allocation;
+    i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
+
+    /* 4x downscaled surface  */
+    if (!avc_ctx->preenc_scaled_4x_surface_obj) {
+        i965_CreateSurfaces(ctx,
+                            generic_state->frame_width_4x,
+                            generic_state->frame_height_4x,
+                            VA_RT_FORMAT_YUV420,
+                            1,
+                            &avc_ctx->preenc_scaled_4x_surface_id);
+        avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
+        if (!avc_ctx->preenc_scaled_4x_surface_obj)
+            goto failed_allocation;
+        i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
+                                    VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+    }
+
+    /* 4x downscaled past ref surface  */
+    if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
+        i965_CreateSurfaces(ctx,
+                            generic_state->frame_width_4x,
+                            generic_state->frame_height_4x,
+                            VA_RT_FORMAT_YUV420,
+                            1,
+                            &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
+        avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
+            SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
+        if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
+            goto failed_allocation;
+        i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
+                                    VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+    }
+
+    /* 4x downscaled future ref surface  */
+    if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
+        i965_CreateSurfaces(ctx,
+                            generic_state->frame_width_4x,
+                            generic_state->frame_height_4x,
+                            VA_RT_FORMAT_YUV420,
+                            1,
+                            &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
+        avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
+            SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
+        if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
+            goto failed_allocation;
+        i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
+                                    VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+    }
+
+    /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
+     * the generic AVC Encdoe codepath which allocate status buffer as extension
+     * to CodedBuffer */
+    if (!avc_ctx->status_buffer.bo) {
+        size =
+            generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
+        size += I965_CODEDBUFFER_HEADER_SIZE;
+        size += 0x1000;
+        avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                                 "Dummy Coded Buffer",
+                                                 size, 64);
+    }
+
+    return VA_STATUS_SUCCESS;
+
+failed_allocation:
+    return VA_STATUS_ERROR_ALLOCATION_FAILED;
+}
+
+
+static VAStatus
+gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
+                               struct encode_state *encode_state,
+                               struct intel_encoder_context *encoder_context)
+{
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
+    VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
+    VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
+
+    /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
+     * to avoid repeated scaling of same surfaces */
+
+    /* down scaling */
+    gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
+                                   INTEL_ENC_HME_4x, SCALE_CUR_PIC);
+    if (stat_param->num_past_references > 0) {
+        gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
+                                       INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
+    }
+    if (stat_param->num_future_references > 0) {
+        gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
+                                       INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
+    }
+
+    /* me kernel */
+    if (generic_state->hme_enabled) {
+        gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
+    }
+
+    /* preproc kernel */
+    if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
+        gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
+    }
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen9_avc_preenc_pipeline(VADriverContextP ctx,
+                         VAProfile profile,
+                         struct encode_state *encode_state,
+                         struct intel_encoder_context *encoder_context)
+{
+    VAStatus va_status;
+
+    va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
+    if (va_status != VA_STATUS_SUCCESS)
+        return va_status;
+
+    va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
+    if (va_status != VA_STATUS_SUCCESS)
+        return va_status;
+
+    va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
+    if (va_status != VA_STATUS_SUCCESS)
+        return va_status;
+
+    return VA_STATUS_SUCCESS;
+}
+
 static void
 gen9_avc_vme_context_destroy(void * context)
 {
@@ -7757,9 +9009,9 @@ gen8_avc_kernel_init(VADriverContextP ctx,
     generic_ctx->get_kernel_header_and_size = fei_enabled ?
                                               intel_avc_fei_get_kernel_header_and_size :
                                               intel_avc_get_kernel_header_and_size ;
-    gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
+    gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
-    gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
+    gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
 
@@ -7788,18 +9040,19 @@ gen9_avc_kernel_init(VADriverContextP ctx,
     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
     int fei_enabled = encoder_context->fei_enabled;
+    int preenc_enabled = encoder_context->preenc_enabled;
 
-    generic_ctx->get_kernel_header_and_size = fei_enabled ?
+    generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
                                               intel_avc_fei_get_kernel_header_and_size :
                                               intel_avc_get_kernel_header_and_size ;
 
-    gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
-                               encoder_context->fei_enabled);
-
-    if (!fei_enabled) {
-        gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
+    if (!fei_enabled && !preenc_enabled) {
+        /* generic AVC Encoder */
+        gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
         gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
-        gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
+        gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
+        gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
+                                   encoder_context->fei_enabled);
         gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
         gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
 
@@ -7829,9 +9082,30 @@ gen9_avc_kernel_init(VADriverContextP ctx,
         else if (IS_KBL(i965->intel.device_info) ||
                  IS_GLK(i965->intel.device_info))
             generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
-    } else {
+
+    } else if (fei_enabled) {
+        /* FEI AVC Encoding */
+        gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
+                                   encoder_context->fei_enabled);
         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
         generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
+
+    } else {
+        /* PreEnc for AVC */
+        gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
+                                     encoder_context->preenc_enabled);
+        gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
+                                encoder_context->preenc_enabled);
+        gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
+
+        /* preenc 4x scaling uses the gen95 kernel */
+        generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
+        generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
+        generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
+
+        generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
+        generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
+        generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
     }
 }
 
@@ -9700,10 +10974,12 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
 
     if (IS_SKL(i965->intel.device_info) ||
         IS_BXT(i965->intel.device_info)) {
-        if (!encoder_context->fei_enabled) {
+        if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
             generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
             generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
         } else {
+            /* FEI and PreEnc operation kernels are included in
+            * the monolithic kernel binary */
             generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
             generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
         }
@@ -9747,19 +11023,25 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
     generic_state->downscaled_width_32x_in_mb = 0;
     generic_state->downscaled_height_32x_in_mb = 0;
 
-    if (!encoder_context->fei_enabled) {
-        generic_state->hme_supported = 1;
-        generic_state->b16xme_supported = 1;
-    }
+    generic_state->hme_supported = 1;
     generic_state->b16xme_supported = 1;
     generic_state->b32xme_supported = 0;
     generic_state->hme_enabled = 0;
     generic_state->b16xme_enabled = 0;
     generic_state->b32xme_enabled = 0;
+
+    if (encoder_context->fei_enabled) {
+        /* Disabling HME in FEI encode */
+        generic_state->hme_supported = 0;
+        generic_state->b16xme_supported = 0;
+    } else if (encoder_context->preenc_enabled) {
+        /* Disabling 16x16ME in PreEnc */
+        generic_state->b16xme_supported = 0;
+    }
+
     generic_state->brc_distortion_buffer_supported = 1;
     generic_state->brc_constant_buffer_supported = 0;
 
-
     generic_state->frame_rate = 30;
     generic_state->brc_allocated = 0;
     generic_state->brc_inited = 0;
@@ -9961,7 +11243,10 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
         gen9_avc_kernel_init(ctx, encoder_context);
     }
     encoder_context->vme_context = vme_context;
-    encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
+    /* Handling PreEnc operations separately since it gives better
+     * code readability, avoid possible vme operations mess-up */
+    encoder_context->vme_pipeline =
+        !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
 
     return true;
index d08db91..f30aee3 100644 (file)
@@ -446,10 +446,20 @@ typedef struct _gen9_avc_fei_encoder_kernel_header {
     kernel_header me_p;
     kernel_header me_b;
 
-    /* 2x DownScaling */
+    /* DownScaling */
+    kernel_header ply_dscale_ply;
+    kernel_header ply_dscale_2f_ply_2f;
+
+    /* BRC_I Frame Distortion */
+    kernel_header frame_brc_i_dist;
+
+    // 2x DownScaling
     kernel_header ply_2xdscale_ply;
     kernel_header ply_2xdscale_2f_ply_2f;
 
+    //Weighted Prediction Kernel
+    kernel_header wp;
+
 } gen9_avc_fei_encoder_kernel_header;
 
 /*
@@ -852,193 +862,468 @@ typedef struct _gen9_avc_me_curbe_data {
     } dw38;
 } gen9_avc_me_curbe_data;
 
-#define GEN9_AVC_KERNEL_ME_P_IDX         0
-#define GEN9_AVC_KERNEL_ME_B_IDX         1
-#define NUM_GEN9_AVC_KERNEL_ME           2
-
-struct gen_avc_me_context {
-    struct i965_gpe_context gpe_contexts[NUM_GEN9_AVC_KERNEL_ME];
-};
-
-/*
-frame/mb brc structure and define
-*/
-typedef enum _gen9_avc_binding_table_offset_brc_init_reset {
-    GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX = 0,
-    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX,
-    GEN9_AVC_BRC_INIT_RESET_NUM_SURFACES
-} gen9_avc_binding_table_offset_brc_init_reset;
-
-typedef struct _gen9_avc_brc_init_reset_curbe_data {
+/* FeiPreEncFixme: name change fei to preenc */
+typedef struct _gen9_avc_fei_me_curbe_data {
     struct {
-        uint32_t profile_level_max_frame;
+        uint32_t skip_mode_enable: 1;
+        uint32_t adaptive_enable: 1;
+        uint32_t bi_mix_dis: 1;
+        uint32_t reserved0: 2;
+        uint32_t early_ime_success_enable: 1;
+        uint32_t reserved1: 1;
+        uint32_t t8x8_flag_for_inter_enable: 1;
+        uint32_t reserved2: 16;
+        uint32_t early_ime_stop: 8;
     } dw0;
 
     struct {
-        uint32_t init_buf_full_in_bits;
+        uint32_t max_num_mvs: 6;
+        uint32_t reserved0: 10;
+        uint32_t bi_weight: 6;
+        uint32_t reserved1: 6;
+        uint32_t uni_mix_disable: 1;
+        uint32_t reserved2: 3;
     } dw1;
 
     struct {
-        uint32_t buf_size_in_bits;
+        uint32_t max_len_sp: 8;
+        uint32_t max_num_su: 8;
+        uint32_t reserved0: 16;
     } dw2;
 
     struct {
-        uint32_t average_bit_rate;
+        uint32_t src_size: 2;
+        uint32_t reserved0: 2;
+        uint32_t mb_type_remap: 2;
+        uint32_t src_access: 1;
+        uint32_t ref_access: 1;
+        uint32_t search_ctrl: 3;
+        uint32_t dual_search_path_option: 1;
+        uint32_t sub_pel_mode: 2;
+        uint32_t skip_type: 1;
+        uint32_t disable_field_cache_allocation: 1;
+        uint32_t inter_chroma_mode: 1;
+        uint32_t ft_enable: 1;
+        uint32_t bme_disable_fbr: 1;
+        uint32_t block_based_skip_enable: 1;
+        uint32_t inter_sad: 2;
+        uint32_t intra_sad: 2;
+        uint32_t sub_mb_part_mask: 7;
+        uint32_t reserved1: 1;
     } dw3;
 
     struct {
-        uint32_t max_bit_rate;
+        uint32_t reserved0: 8;
+        uint32_t picture_height_minus1: 8;
+        uint32_t picture_width: 8;
+        uint32_t reserved1: 8;
     } dw4;
 
     struct {
-        uint32_t min_bit_rate;
+        uint32_t reserved0: 8;
+        uint32_t qp_prime_y: 8;
+        uint32_t ref_width: 8;
+        uint32_t ref_height: 8;
     } dw5;
 
     struct {
-        uint32_t frame_rate_m;
+        uint32_t reserved0: 3;
+        uint32_t write_distortions: 1;
+        uint32_t use_mv_from_prev_step: 1;
+        uint32_t reserved1: 3;
+        uint32_t super_combine_dist: 8;
+        uint32_t max_vmvr: 16;
     } dw6;
 
     struct {
-        uint32_t frame_rate_d;
+        uint32_t reserved0: 16;
+        uint32_t mv_cost_scale_factor: 2;
+        uint32_t bilinear_enable: 1;
+        uint32_t src_field_polarity: 1;
+        uint32_t weightedsad_harr: 1;
+        uint32_t ac_only_haar: 1;
+        uint32_t ref_id_cost_mode: 1;
+        uint32_t reserved1: 1;
+        uint32_t skip_center_mask: 8;
     } dw7;
 
     struct {
-        uint32_t brc_flag: 16;
-        uint32_t gop_p: 16;
+        uint32_t mode_0_cost: 8;
+        uint32_t mode_1_cost: 8;
+        uint32_t mode_2_cost: 8;
+        uint32_t mode_3_cost: 8;
     } dw8;
 
     struct {
-        uint32_t gop_b: 16;
-        uint32_t frame_width_in_bytes: 16;
+        uint32_t mode_4_cost: 8;
+        uint32_t mode_5_cost: 8;
+        uint32_t mode_6_cost: 8;
+        uint32_t mode_7_cost: 8;
     } dw9;
 
     struct {
-        uint32_t frame_height_in_bytes: 16;
-        uint32_t avbr_accuracy: 16;
+        uint32_t mode_8_cost: 8;
+        uint32_t mode_9_cost: 8;
+        uint32_t ref_id_cost: 8;
+        uint32_t chroma_intra_mode_cost: 8;
     } dw10;
 
     struct {
-        uint32_t avbr_convergence: 16;
-        uint32_t min_qp: 16;
+        uint32_t mv_0_cost: 8;
+        uint32_t mv_1_cost: 8;
+        uint32_t mv_2_cost: 8;
+        uint32_t mv_3_cost: 8;
     } dw11;
 
     struct {
-        uint32_t max_qp: 16;
-        uint32_t no_slices: 16;
+        uint32_t mv_4_cost: 8;
+        uint32_t mv_5_cost: 8;
+        uint32_t mv_6_cost: 8;
+        uint32_t mv_7_cost: 8;
     } dw12;
 
     struct {
-        uint32_t instant_rate_threshold_0_p: 8;
-        uint32_t instant_rate_threshold_1_p: 8;
-        uint32_t instant_rate_threshold_2_p: 8;
-        uint32_t instant_rate_threshold_3_p: 8;
+        uint32_t num_ref_idx_l0_minus1: 8;
+        uint32_t num_ref_idx_l1_minus1: 8;
+        uint32_t actual_mb_width: 8;
+        uint32_t actual_mb_height: 8;
     } dw13;
 
     struct {
-        uint32_t instant_rate_threshold_0_b: 8;
-        uint32_t instant_rate_threshold_1_b: 8;
-        uint32_t instant_rate_threshold_2_b: 8;
-        uint32_t instant_rate_threshold_3_b: 8;
+        uint32_t l0_ref_pic_polarity_bits: 8;
+        uint32_t l1_ref_pic_polarity_bits: 2;
+        uint32_t reserved: 22;
     } dw14;
 
     struct {
-        uint32_t instant_rate_threshold_0_i: 8;
-        uint32_t instant_rate_threshold_1_i: 8;
-        uint32_t instant_rate_threshold_2_i: 8;
-        uint32_t instant_rate_threshold_3_i: 8;
+        uint32_t prev_mv_read_pos_factor : 8;
+        uint32_t mv_shift_factor : 8;
+        uint32_t reserved: 16;
     } dw15;
 
     struct {
-        uint32_t deviation_threshold_0_pand_b: 8;
-        uint32_t deviation_threshold_1_pand_b: 8;
-        uint32_t deviation_threshold_2_pand_b: 8;
-        uint32_t deviation_threshold_3_pand_b: 8;
+        struct generic_search_path_delta sp_delta_0;
+        struct generic_search_path_delta sp_delta_1;
+        struct generic_search_path_delta sp_delta_2;
+        struct generic_search_path_delta sp_delta_3;
     } dw16;
 
     struct {
-        uint32_t deviation_threshold_4_pand_b: 8;
-        uint32_t deviation_threshold_5_pand_b: 8;
-        uint32_t deviation_threshold_6_pand_b: 8;
-        uint32_t deviation_threshold_7_pand_b: 8;
+        struct generic_search_path_delta sp_delta_4;
+        struct generic_search_path_delta sp_delta_5;
+        struct generic_search_path_delta sp_delta_6;
+        struct generic_search_path_delta sp_delta_7;
     } dw17;
 
     struct {
-        uint32_t deviation_threshold_0_vbr: 8;
-        uint32_t deviation_threshold_1_vbr: 8;
-        uint32_t deviation_threshold_2_vbr: 8;
-        uint32_t deviation_threshold_3_vbr: 8;
+        struct generic_search_path_delta sp_delta_8;
+        struct generic_search_path_delta sp_delta_9;
+        struct generic_search_path_delta sp_delta_10;
+        struct generic_search_path_delta sp_delta_11;
     } dw18;
 
     struct {
-        uint32_t deviation_threshold_4_vbr: 8;
-        uint32_t deviation_threshold_5_vbr: 8;
-        uint32_t deviation_threshold_6_vbr: 8;
-        uint32_t deviation_threshold_7_vbr: 8;
+        struct generic_search_path_delta sp_delta_12;
+        struct generic_search_path_delta sp_delta_13;
+        struct generic_search_path_delta sp_delta_14;
+        struct generic_search_path_delta sp_delta_15;
     } dw19;
 
     struct {
-        uint32_t deviation_threshold_0_i: 8;
-        uint32_t deviation_threshold_1_i: 8;
-        uint32_t deviation_threshold_2_i: 8;
-        uint32_t deviation_threshold_3_i: 8;
+        struct generic_search_path_delta sp_delta_16;
+        struct generic_search_path_delta sp_delta_17;
+        struct generic_search_path_delta sp_delta_18;
+        struct generic_search_path_delta sp_delta_19;
     } dw20;
 
     struct {
-        uint32_t deviation_threshold_4_i: 8;
-        uint32_t deviation_threshold_5_i: 8;
-        uint32_t deviation_threshold_6_i: 8;
-        uint32_t deviation_threshold_7_i: 8;
+        struct generic_search_path_delta sp_delta_20;
+        struct generic_search_path_delta sp_delta_21;
+        struct generic_search_path_delta sp_delta_22;
+        struct generic_search_path_delta sp_delta_23;
     } dw21;
 
     struct {
-        uint32_t initial_qp_i: 8;
-        uint32_t initial_qp_p: 8;
-        uint32_t initial_qp_b: 8;
-        uint32_t sliding_window_size: 8;
+        struct generic_search_path_delta sp_delta_24;
+        struct generic_search_path_delta sp_delta_25;
+        struct generic_search_path_delta sp_delta_26;
+        struct generic_search_path_delta sp_delta_27;
     } dw22;
 
     struct {
-        uint32_t acqp;
+        struct generic_search_path_delta sp_delta_28;
+        struct generic_search_path_delta sp_delta_29;
+        struct generic_search_path_delta sp_delta_30;
+        struct generic_search_path_delta sp_delta_31;
     } dw23;
 
-} gen9_avc_brc_init_reset_curbe_data;
-
-typedef enum _gen9_avc_binding_table_offset_frame_brc_update {
-    GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX                = 0,
-    GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX  = 1,
-    GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX       = 2,
-    GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX      = 3,
-    GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX       = 4,
-    GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX      = 5,
-    GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX             = 6,
-    GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX          = 7,
-    GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX              = 8,
-    GEN9_AVC_FRAME_BRC_UPDATE_NUM_SURFACES_INDEX           = 9
-} gen9_avc_binding_table_offset_frame_brc_update;
-
-typedef struct _gen9_avc_frame_brc_update_curbe_data {
     struct {
-        uint32_t target_size;
-    } dw0;
+        struct generic_search_path_delta sp_delta_32;
+        struct generic_search_path_delta sp_delta_33;
+        struct generic_search_path_delta sp_delta_34;
+        struct generic_search_path_delta sp_delta_35;
+    } dw24;
 
     struct {
-        uint32_t frame_number;
-    } dw1;
+        struct generic_search_path_delta sp_delta_36;
+        struct generic_search_path_delta sp_delta_37;
+        struct generic_search_path_delta sp_delta_38;
+        struct generic_search_path_delta sp_delta_39;
+    } dw25;
 
     struct {
-        uint32_t size_of_pic_headers;
-    } dw2;
+        struct generic_search_path_delta sp_delta_40;
+        struct generic_search_path_delta sp_delta_41;
+        struct generic_search_path_delta sp_delta_42;
+        struct generic_search_path_delta sp_delta_43;
+    } dw26;
 
     struct {
-        uint32_t start_gadj_frame0: 16;
-        uint32_t start_gadj_frame1: 16;
-    } dw3;
+        struct generic_search_path_delta sp_delta_44;
+        struct generic_search_path_delta sp_delta_45;
+        struct generic_search_path_delta sp_delta_46;
+        struct generic_search_path_delta sp_delta_47;
+    } dw27;
 
     struct {
-        uint32_t start_gadj_frame2: 16;
-        uint32_t start_gadj_frame3: 16;
-    } dw4;
-
+        struct generic_search_path_delta sp_delta_48;
+        struct generic_search_path_delta sp_delta_49;
+        struct generic_search_path_delta sp_delta_50;
+        struct generic_search_path_delta sp_delta_51;
+    } dw28;
+
+    struct {
+        struct generic_search_path_delta sp_delta_52;
+        struct generic_search_path_delta sp_delta_53;
+        struct generic_search_path_delta sp_delta_54;
+        struct generic_search_path_delta sp_delta_55;
+    } dw29;
+
+    struct {
+        uint32_t reserved;
+    } dw30;
+
+    struct {
+        uint32_t reserved;
+    } dw31;
+
+    struct {
+        uint32_t _4x_memv_output_data_surf_index;
+    } dw32;
+
+    struct {
+        uint32_t _16x_32x_memv_input_data_surf_index;
+    } dw33;
+
+    struct {
+        uint32_t _4x_me_output_dist_surf_index;
+    } dw34;
+
+    struct {
+        uint32_t _4x_me_output_brc_dist_surf_index;
+    } dw35;
+
+    struct {
+        uint32_t vme_fwd_inter_pred_surf_index;
+    } dw36;
+
+    struct {
+        uint32_t vme_bdw_inter_pred_surf_index;
+    } dw37;
+
+    /* reserved */
+    struct {
+        uint32_t reserved;
+    } dw38;
+} gen9_avc_fei_me_curbe_data;
+
+#define GEN9_AVC_KERNEL_ME_P_IDX         0
+#define GEN9_AVC_KERNEL_ME_B_IDX         1
+#define NUM_GEN9_AVC_KERNEL_ME           2
+
+struct gen_avc_me_context {
+    struct i965_gpe_context gpe_contexts[NUM_GEN9_AVC_KERNEL_ME];
+};
+
+/*
+frame/mb brc structure and define
+*/
+typedef enum _gen9_avc_binding_table_offset_brc_init_reset {
+    GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX = 0,
+    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX,
+    GEN9_AVC_BRC_INIT_RESET_NUM_SURFACES
+} gen9_avc_binding_table_offset_brc_init_reset;
+
+typedef struct _gen9_avc_brc_init_reset_curbe_data {
+    struct {
+        uint32_t profile_level_max_frame;
+    } dw0;
+
+    struct {
+        uint32_t init_buf_full_in_bits;
+    } dw1;
+
+    struct {
+        uint32_t buf_size_in_bits;
+    } dw2;
+
+    struct {
+        uint32_t average_bit_rate;
+    } dw3;
+
+    struct {
+        uint32_t max_bit_rate;
+    } dw4;
+
+    struct {
+        uint32_t min_bit_rate;
+    } dw5;
+
+    struct {
+        uint32_t frame_rate_m;
+    } dw6;
+
+    struct {
+        uint32_t frame_rate_d;
+    } dw7;
+
+    struct {
+        uint32_t brc_flag: 16;
+        uint32_t gop_p: 16;
+    } dw8;
+
+    struct {
+        uint32_t gop_b: 16;
+        uint32_t frame_width_in_bytes: 16;
+    } dw9;
+
+    struct {
+        uint32_t frame_height_in_bytes: 16;
+        uint32_t avbr_accuracy: 16;
+    } dw10;
+
+    struct {
+        uint32_t avbr_convergence: 16;
+        uint32_t min_qp: 16;
+    } dw11;
+
+    struct {
+        uint32_t max_qp: 16;
+        uint32_t no_slices: 16;
+    } dw12;
+
+    struct {
+        uint32_t instant_rate_threshold_0_p: 8;
+        uint32_t instant_rate_threshold_1_p: 8;
+        uint32_t instant_rate_threshold_2_p: 8;
+        uint32_t instant_rate_threshold_3_p: 8;
+    } dw13;
+
+    struct {
+        uint32_t instant_rate_threshold_0_b: 8;
+        uint32_t instant_rate_threshold_1_b: 8;
+        uint32_t instant_rate_threshold_2_b: 8;
+        uint32_t instant_rate_threshold_3_b: 8;
+    } dw14;
+
+    struct {
+        uint32_t instant_rate_threshold_0_i: 8;
+        uint32_t instant_rate_threshold_1_i: 8;
+        uint32_t instant_rate_threshold_2_i: 8;
+        uint32_t instant_rate_threshold_3_i: 8;
+    } dw15;
+
+    struct {
+        uint32_t deviation_threshold_0_pand_b: 8;
+        uint32_t deviation_threshold_1_pand_b: 8;
+        uint32_t deviation_threshold_2_pand_b: 8;
+        uint32_t deviation_threshold_3_pand_b: 8;
+    } dw16;
+
+    struct {
+        uint32_t deviation_threshold_4_pand_b: 8;
+        uint32_t deviation_threshold_5_pand_b: 8;
+        uint32_t deviation_threshold_6_pand_b: 8;
+        uint32_t deviation_threshold_7_pand_b: 8;
+    } dw17;
+
+    struct {
+        uint32_t deviation_threshold_0_vbr: 8;
+        uint32_t deviation_threshold_1_vbr: 8;
+        uint32_t deviation_threshold_2_vbr: 8;
+        uint32_t deviation_threshold_3_vbr: 8;
+    } dw18;
+
+    struct {
+        uint32_t deviation_threshold_4_vbr: 8;
+        uint32_t deviation_threshold_5_vbr: 8;
+        uint32_t deviation_threshold_6_vbr: 8;
+        uint32_t deviation_threshold_7_vbr: 8;
+    } dw19;
+
+    struct {
+        uint32_t deviation_threshold_0_i: 8;
+        uint32_t deviation_threshold_1_i: 8;
+        uint32_t deviation_threshold_2_i: 8;
+        uint32_t deviation_threshold_3_i: 8;
+    } dw20;
+
+    struct {
+        uint32_t deviation_threshold_4_i: 8;
+        uint32_t deviation_threshold_5_i: 8;
+        uint32_t deviation_threshold_6_i: 8;
+        uint32_t deviation_threshold_7_i: 8;
+    } dw21;
+
+    struct {
+        uint32_t initial_qp_i: 8;
+        uint32_t initial_qp_p: 8;
+        uint32_t initial_qp_b: 8;
+        uint32_t sliding_window_size: 8;
+    } dw22;
+
+    struct {
+        uint32_t acqp;
+    } dw23;
+
+} gen9_avc_brc_init_reset_curbe_data;
+
+typedef enum _gen9_avc_binding_table_offset_frame_brc_update {
+    GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX                = 0,
+    GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX  = 1,
+    GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX       = 2,
+    GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX      = 3,
+    GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX       = 4,
+    GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX      = 5,
+    GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX             = 6,
+    GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX          = 7,
+    GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX              = 8,
+    GEN9_AVC_FRAME_BRC_UPDATE_NUM_SURFACES_INDEX           = 9
+} gen9_avc_binding_table_offset_frame_brc_update;
+
+typedef struct _gen9_avc_frame_brc_update_curbe_data {
+    struct {
+        uint32_t target_size;
+    } dw0;
+
+    struct {
+        uint32_t frame_number;
+    } dw1;
+
+    struct {
+        uint32_t size_of_pic_headers;
+    } dw2;
+
+    struct {
+        uint32_t start_gadj_frame0: 16;
+        uint32_t start_gadj_frame1: 16;
+    } dw3;
+
+    struct {
+        uint32_t start_gadj_frame2: 16;
+        uint32_t start_gadj_frame3: 16;
+    } dw4;
+
     struct {
         uint32_t target_size_flag: 8;
         uint32_t brc_flag: 8;
@@ -2923,6 +3208,379 @@ struct gen_avc_sfd_context {
     struct i965_gpe_context gpe_contexts;
 };
 
+struct gen_avc_preproc_context {
+    struct i965_gpe_context gpe_contexts;
+};
+
+/* preproc binding table */
+typedef enum _gen9_avc_binding_table_offset_preproc {
+    GEN9_AVC_PREPROC_CURR_Y_INDEX                 = 0,
+    GEN9_AVC_PREPROC_CURR_UV_INDEX                = 1,
+    GEN9_AVC_PREPROC_HME_MV_DATA_INDEX            = 2,
+    GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX           = 3,
+    GEN9_AVC_PREPROC_MBQP_INDEX                   = 4,
+    GEN9_AVC_PREPROC_MV_DATA_INDEX                = 5,
+    GEN9_AVC_PREPROC_MB_STATS_INDEX               = 6,
+    GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX     = 7,
+    GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX       = 8,
+    GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX     = 9,
+    GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX     = 10,
+    GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX     = 11,
+    GEN9_AVC_PREPROC_RESERVED1_INDEX              = 12,
+    GEN9_AVC_PREPROC_FTQ_LUT_INDEX                = 13,
+    GEN9_AVC_PREPROC_NUM_SURFACES_INDEX           = 14
+} gen9_avc_binding_table_offset_preproc;
+
+/* preenc preproc curbe data */
+typedef struct _gen9_avc_preproc_curbe_data {
+    struct {
+        uint32_t skip_mode_enable: 1;
+        uint32_t adaptive_enable: 1;
+        uint32_t bi_mix_dis: 1;
+        uint32_t reserved0: 2;
+        uint32_t early_ime_success_enable: 1;
+        uint32_t reserved1: 1;
+        uint32_t t8x8_flag_for_inter_enable: 1;
+        uint32_t reserved2: 16;
+        uint32_t early_ime_stop: 8;
+    } dw0;
+
+    struct {
+        uint32_t max_num_mvs: 6;
+        uint32_t reserved0: 10;
+        uint32_t bi_weight: 6;
+        uint32_t reserved1: 6;
+        uint32_t uni_mix_disable: 1;
+        uint32_t reserved2: 3;
+    } dw1;
+
+    struct {
+        uint32_t max_len_sp: 8;
+        uint32_t max_num_su: 8;
+        uint32_t pic_width: 16;
+    } dw2;
+
+    struct {
+        uint32_t src_size: 2;
+        uint32_t reserved0: 2;
+        uint32_t mb_type_remap: 2;
+        uint32_t src_access: 1;
+        uint32_t ref_access: 1;
+        uint32_t search_ctrl: 3;
+        uint32_t dual_search_path_option: 1;
+        uint32_t sub_pel_mode: 2;
+        uint32_t skip_type: 1;
+        uint32_t disable_field_cache_allocation: 1;
+        uint32_t inter_chroma_mode: 1;
+        uint32_t ft_enable: 1;
+        uint32_t bme_disable_fbr: 1;
+        uint32_t block_based_skip_enable: 1;
+        uint32_t inter_sad: 2;
+        uint32_t intra_sad: 2;
+        uint32_t sub_mb_part_mask: 7;
+        uint32_t reserved1: 1;
+    } dw3;
+
+    struct {
+        uint32_t frame_qp: 8;
+        uint32_t per_mb_qp_enable: 1;
+        uint32_t field_parity_flag: 1;
+        uint32_t hme_enable : 1;
+        uint32_t multiple_mv_predictor_per_mb_enable: 2;
+        uint32_t disable_mv_output: 1;
+        uint32_t disable_mb_stats: 1;
+        uint32_t bwd_ref_pic_frame_field_flag: 1;
+        uint32_t fwd_ref_pic_frame_field_flag: 1;
+        uint32_t bwd_ref_pic_field_parity_flag: 1;
+        uint32_t fwd_ref_pic_field_parity_flag: 1;
+        uint32_t curr_pic_field_parity_flag: 1;
+        uint32_t bwd_ref_pic_enable: 1;
+        uint32_t fwd_ref_pic_enable: 1;
+        uint32_t reserved: 10;
+    } dw4;
+
+    struct {
+        uint32_t slice_mb_height: 16;
+        uint32_t ref_width: 8;
+        uint32_t ref_height: 8;
+    } dw5;
+
+    struct {
+        uint32_t pic_height: 16;
+        uint32_t reserved: 16;
+    } dw6;
+
+    struct {
+        uint32_t intra_part_mask: 5;
+        uint32_t non_skip_zmv_added: 1;
+        uint32_t non_skip_mode_added: 1;
+        uint32_t luma_intra_src_corner_swap: 1;
+        uint32_t reserved0: 8;
+        uint32_t mv_cost_scale_factor: 2;
+        uint32_t bilinear_enable: 1;
+        uint32_t src_field_polarity: 1;
+        uint32_t weightedsad_harr: 1;
+        uint32_t ac_only_haar: 1;
+        uint32_t ref_id_cost_mode: 1;
+        uint32_t reserved1: 1;
+        uint32_t skip_center_mask: 8;
+    } dw7;
+
+    struct {
+        uint32_t mode_0_cost: 8;
+        uint32_t mode_1_cost: 8;
+        uint32_t mode_2_cost: 8;
+        uint32_t mode_3_cost: 8;
+    } dw8;
+
+    struct {
+        uint32_t mode_4_cost: 8;
+        uint32_t mode_5_cost: 8;
+        uint32_t mode_6_cost: 8;
+        uint32_t mode_7_cost: 8;
+    } dw9;
+    struct {
+        uint32_t mode_8_cost: 8;
+        uint32_t mode_9_cost: 8;
+        uint32_t ref_id_cost: 8;
+        uint32_t chroma_intra_mode_cost: 8;
+    } dw10;
+
+    struct {
+        uint32_t mv_0_cost: 8;
+        uint32_t mv_1_cost: 8;
+        uint32_t mv_2_cost: 8;
+        uint32_t mv_3_cost: 8;
+    } dw11;
+    struct {
+        uint32_t mv_4_cost: 8;
+        uint32_t mv_5_cost: 8;
+        uint32_t mv_6_cost: 8;
+        uint32_t mv_7_cost: 8;
+    } dw12;
+
+    struct {
+        uint32_t reserved;
+    } dw13;
+
+    struct {
+        uint32_t sic_fwd_trans_coeff_threshold_0: 16;
+        uint32_t sic_fwd_trans_coeff_threshold_1: 8;
+        uint32_t sic_fwd_trans_coeff_threshold_2: 8;
+    } dw14;
+
+    struct {
+        uint32_t sic_fwd_trans_coeff_threshold_3: 8;
+        uint32_t sic_fwd_trans_coeff_threshold_4: 8;
+        uint32_t sic_fwd_trans_coeff_threshold_5: 8;
+        uint32_t sic_fwd_trans_coeff_threshold_6: 8;
+    } dw15;
+
+    struct {
+        struct generic_search_path_delta sp_delta_0;
+        struct generic_search_path_delta sp_delta_1;
+        struct generic_search_path_delta sp_delta_2;
+        struct generic_search_path_delta sp_delta_3;
+    } dw16;
+
+    struct {
+        struct generic_search_path_delta sp_delta_4;
+        struct generic_search_path_delta sp_delta_5;
+        struct generic_search_path_delta sp_delta_6;
+        struct generic_search_path_delta sp_delta_7;
+    } dw17;
+
+    struct {
+        struct generic_search_path_delta sp_delta_8;
+        struct generic_search_path_delta sp_delta_9;
+        struct generic_search_path_delta sp_delta_10;
+        struct generic_search_path_delta sp_delta_11;
+    } dw18;
+    struct {
+        struct generic_search_path_delta sp_delta_12;
+        struct generic_search_path_delta sp_delta_13;
+        struct generic_search_path_delta sp_delta_14;
+        struct generic_search_path_delta sp_delta_15;
+    } dw19;
+
+    struct {
+        struct generic_search_path_delta sp_delta_16;
+        struct generic_search_path_delta sp_delta_17;
+        struct generic_search_path_delta sp_delta_18;
+        struct generic_search_path_delta sp_delta_19;
+    } dw20;
+
+    struct {
+        struct generic_search_path_delta sp_delta_20;
+        struct generic_search_path_delta sp_delta_21;
+        struct generic_search_path_delta sp_delta_22;
+        struct generic_search_path_delta sp_delta_23;
+    } dw21;
+
+
+    struct {
+        struct generic_search_path_delta sp_delta_24;
+        struct generic_search_path_delta sp_delta_25;
+        struct generic_search_path_delta sp_delta_26;
+        struct generic_search_path_delta sp_delta_27;
+    } dw22;
+
+    struct {
+        struct generic_search_path_delta sp_delta_28;
+        struct generic_search_path_delta sp_delta_29;
+        struct generic_search_path_delta sp_delta_30;
+        struct generic_search_path_delta sp_delta_31;
+    } dw23;
+
+    struct {
+        struct generic_search_path_delta sp_delta_32;
+        struct generic_search_path_delta sp_delta_33;
+        struct generic_search_path_delta sp_delta_34;
+        struct generic_search_path_delta sp_delta_35;
+    } dw24;
+
+    struct {
+        struct generic_search_path_delta sp_delta_36;
+        struct generic_search_path_delta sp_delta_37;
+        struct generic_search_path_delta sp_delta_38;
+        struct generic_search_path_delta sp_delta_39;
+    } dw25;
+
+
+    struct {
+        struct generic_search_path_delta sp_delta_40;
+        struct generic_search_path_delta sp_delta_41;
+        struct generic_search_path_delta sp_delta_42;
+        struct generic_search_path_delta sp_delta_43;
+    } dw26;
+
+    struct {
+        struct generic_search_path_delta sp_delta_44;
+        struct generic_search_path_delta sp_delta_45;
+        struct generic_search_path_delta sp_delta_46;
+        struct generic_search_path_delta sp_delta_47;
+    } dw27;
+
+
+    struct {
+        struct generic_search_path_delta sp_delta_48;
+        struct generic_search_path_delta sp_delta_49;
+        struct generic_search_path_delta sp_delta_50;
+        struct generic_search_path_delta sp_delta_51;
+    } dw28;
+
+    struct {
+        struct generic_search_path_delta sp_delta_52;
+        struct generic_search_path_delta sp_delta_53;
+        struct generic_search_path_delta sp_delta_54;
+        struct generic_search_path_delta sp_delta_55;
+    } dw29;
+
+    struct {
+        uint32_t intra_4x4_mode_mask: 8;
+        uint32_t reserved1: 8;
+        uint32_t intra_8x8_mode_mask: 8;
+        uint32_t reserved2: 8;
+    } dw30;
+
+    struct {
+        uint32_t intra_16x16_mode_mask: 4;
+        uint32_t intra_chroma_mode_mask: 4;
+        uint32_t intra_compute_type: 2;
+        uint32_t reserved: 22;
+    } dw31;
+
+    struct {
+        uint32_t max_vmv_r: 16;
+        uint32_t reserved: 16;
+    } dw32;
+
+    struct {
+        uint32_t intra_16x16_non_dc_predPenalty: 8;
+        uint32_t intra_8x8_non_dc_pred_penalty: 8;
+        uint32_t intra_4x4_non_dc_pred_penalty: 8;
+        uint32_t reserved : 8;
+    } dw33;
+
+    struct {
+        uint32_t reserved;
+    } dw34;
+
+    struct {
+        uint32_t reserved;
+    } dw35;
+
+    struct {
+        uint32_t reserved1: 8;
+        uint32_t hme_combined_extra_sus: 8;
+        uint32_t reserved2: 14;
+        uint32_t hme_combine_overlap: 2;
+    } dw36;
+
+    struct {
+        uint32_t skip_mode_enable: 1;
+        uint32_t adaptive_enable: 1;
+        uint32_t bi_mix_disable: 1;
+        uint32_t reserved1: 2;
+        uint32_t early_ime_success_enable: 1;
+        uint32_t reserved2: 1;
+        uint32_t t8x8_flag_for_inter_enable: 1;
+        uint32_t reserved3: 16;
+        uint32_t early_ime_stop: 8;
+    } dw37;
+
+    struct {
+        uint32_t max_len_sp: 8;
+        uint32_t max_num_su: 8;
+        uint32_t ref_threshold: 16;
+    } dw38;
+
+    struct {
+        uint32_t reserved: 8;
+        uint32_t hme_ref_windows_comb_threshold: 8;
+        uint32_t ref_width: 8;
+        uint32_t ref_height: 8;
+    } dw39;
+
+
+    struct {
+        uint32_t curr_pic_surf_index;
+    } dw40;
+
+    struct {
+        uint32_t hme_mv_dat_surf_index;
+    } dw41;
+
+    struct {
+        uint32_t mv_predictor_surf_index;
+    } dw42;
+
+    struct {
+        uint32_t mb_qp_surf_index;
+    } dw43;
+
+    struct {
+        uint32_t mv_data_out_surf_index;
+    } dw44;
+
+    struct {
+        uint32_t mb_stats_out_surf_index;
+    } dw45;
+
+    struct {
+        uint32_t vme_inter_prediction_surf_index;
+    } dw46;
+
+    struct {
+        uint32_t vme_Inter_prediction_mr_surf_index;
+    } dw47;
+
+    struct {
+        uint32_t ftq_lut_surf_index;
+    } dw48;
+
+} gen9_avc_preproc_curbe_data;
+
 /* Gen95 */
 
 typedef struct _gen95_avc_scaling4x_curbe_data {
index d69686f..e16cbed 100644 (file)
@@ -179,6 +179,22 @@ struct i965_avc_encoder_context {
     //ref list
     struct i965_gpe_resource list_reference_res[MAX_MFC_AVC_REFERENCE_SURFACES];
 
+    //preenc downscale surfae
+    VASurfaceID preenc_scaled_4x_surface_id;
+    struct object_surface *preenc_scaled_4x_surface_obj;
+    VASurfaceID preenc_past_ref_scaled_4x_surface_id;
+    struct object_surface *preenc_past_ref_scaled_4x_surface_obj;
+    VASurfaceID preenc_future_ref_scaled_4x_surface_id;
+    struct object_surface *preenc_future_ref_scaled_4x_surface_obj;
+    struct i965_gpe_resource preenc_past_ref_stat_data_out_buffer;
+    struct i965_gpe_resource preenc_future_ref_stat_data_out_buffer;
+
+    // preproc resources
+    struct i965_gpe_resource preproc_mv_predictor_buffer;
+    struct i965_gpe_resource preproc_mb_qp_buffer;
+    struct i965_gpe_resource preproc_mv_data_out_buffer;
+    struct i965_gpe_resource preproc_stat_data_out_buffer;
+
     // kernel context
     struct gen_avc_scaling_context  context_scaling;
     struct gen_avc_me_context  context_me;
@@ -186,6 +202,7 @@ struct i965_avc_encoder_context {
     struct gen_avc_mbenc_context  context_mbenc;
     struct gen_avc_wp_context  context_wp;
     struct gen_avc_sfd_context  context_sfd;
+    struct gen_avc_preproc_context  context_preproc;
 
     struct encoder_status_buffer_internal status_buffer;
 
@@ -199,6 +216,7 @@ struct avc_enc_state {
     VAEncSliceParameterBufferH264    *slice_param[MAX_AVC_SLICE_NUM];
     VAEncMacroblockParameterBufferH264 *mb_param;
     VAEncMiscParameterFEIFrameControlH264 *fei_framectl_param;
+    VAStatsStatisticsParameterH264 *stat_param;
     uint32_t mad_enable: 1;
     //mb skip
     uint32_t mb_disable_skip_map_enable: 1;