OSDN Git Service

configure: change package tarball name to use hyphens
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_avc_encoder.c
old mode 100755 (executable)
new mode 100644 (file)
index 72a8c00..d387621
@@ -1,5 +1,5 @@
 /*
- * Copyright ? 2016 Intel Corporation
+ * Copyright @ 2017 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -19,7 +19,7 @@
  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWAR
+ * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *    Pengfei Qu <Pengfei.qu@intel.com>
 #define MAX_URB_SIZE                    4096 /* In register */
 #define NUM_KERNELS_PER_GPE_CONTEXT     1
 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
+#define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
 
 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
         if (bo) {                                                       \
-            OUT_BCS_RELOC(batch,                                        \
+            OUT_BCS_RELOC64(batch,                                        \
                             bo,                                         \
                             I915_GEM_DOMAIN_INSTRUCTION,                \
-                            is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0,     \
+                            is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
                             delta);                                     \
-            OUT_BCS_BATCH(batch, 0);                                    \
         } else {                                                        \
             OUT_BCS_BATCH(batch, 0);                                    \
             OUT_BCS_BATCH(batch, 0);                                    \
@@ -74,7 +74,6 @@
         OUT_BCS_BATCH(batch, attr);                             \
     } while (0)
 
-
 static const uint32_t qm_flat[16] = {
     0x10101010, 0x10101010, 0x10101010, 0x10101010,
     0x10101010, 0x10101010, 0x10101010, 0x10101010,
@@ -93,9 +92,9 @@ static const uint32_t fqm_flat[32] = {
     0x10001000, 0x10001000, 0x10001000, 0x10001000
 };
 
-static unsigned int slice_type_kernel[3] = {1,2,0};
+static const unsigned int slice_type_kernel[3] = {1,2,0};
 
-const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data =
+static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data =
 {
     // unsigned int 0
     {
@@ -253,7 +252,7 @@ const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data
     }
 };
 
-const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data =
+static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data =
 {
     // unsigned int 0
     {
@@ -412,147 +411,85 @@ const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_
 };
 
 static void
-gen9_avc_update_rate_control_parameters(VADriverContextP ctx,
-                                        struct intel_encoder_context *encoder_context,
-                                        VAEncMiscParameterRateControl *misc)
+gen9_avc_update_misc_parameters(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
+    int i;
 
-    generic_state->max_bit_rate = ALIGN(misc->bits_per_second, 1000) / 1000;
-    generic_state->window_size = misc->window_size;
+    /* brc */
+    generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
 
-    if (generic_state->internal_rate_mode == INTEL_BRC_CBR) {
+    generic_state->brc_need_reset = encoder_context->brc.need_reset;
+
+    if (generic_state->internal_rate_mode == VA_RC_CBR) {
         generic_state->min_bit_rate = generic_state->max_bit_rate;
-        generic_state->mb_brc_enabled = misc->rc_flags.bits.mb_rate_control;
+        generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
 
         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
             generic_state->target_bit_rate = generic_state->max_bit_rate;
             generic_state->brc_need_reset = 1;
         }
-    } else if (generic_state->internal_rate_mode == INTEL_BRC_VBR) {
-        generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * misc->target_percentage - 100) / 100;
-        generic_state->mb_brc_enabled = misc->rc_flags.bits.mb_rate_control;
+    } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
+        generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
+        generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
 
-        if (generic_state->target_bit_rate != generic_state->max_bit_rate * misc->target_percentage / 100) {
-            generic_state->target_bit_rate = generic_state->max_bit_rate * misc->target_percentage / 100;
+        if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
+            generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
             generic_state->brc_need_reset = 1;
         }
     }
-}
-
-static void
-gen9_avc_update_hrd_parameters(VADriverContextP ctx,
-                               struct intel_encoder_context *encoder_context,
-                               VAEncMiscParameterHRD *misc)
-{
-    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
-
-    if (generic_state->internal_rate_mode == INTEL_BRC_CQP)
-        return;
-
-    generic_state->vbv_buffer_size_in_bit = misc->buffer_size;
-    generic_state->init_vbv_buffer_fullness_in_bit = misc->initial_buffer_fullness;
-}
-
-static void
-gen9_avc_update_framerate_parameters(VADriverContextP ctx,
-                                     struct intel_encoder_context *encoder_context,
-                                     VAEncMiscParameterFrameRate *misc)
-{
-    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
-
-    generic_state->frames_per_100s = misc->framerate * 100; /* misc->framerate is multiple of 100 */
-    generic_state->frame_rate = misc->framerate ;
-}
 
-static void
-gen9_avc_update_roi_parameters(VADriverContextP ctx,
-                               struct intel_encoder_context *encoder_context,
-                               VAEncMiscParameterBufferROI *misc)
-{
-    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
-    int i;
-
-    if (!misc || !misc->roi) {
-        generic_state->num_roi = 0;
-        return;
+    /*  frame rate */
+    if (generic_state->internal_rate_mode != VA_RC_CQP)
+    {
+        generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100/encoder_context->brc.framerate[0].den ;
+        generic_state->frame_rate = encoder_context->brc.framerate[0].num/encoder_context->brc.framerate[0].den ;
+        generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate /1000);// brc.windows size in ms as the unit
+    }else
+    {
+        generic_state->frames_per_100s = 30 * 100;
+        generic_state->frame_rate = 30 ;
+        generic_state->frames_per_window_size = 30;
     }
 
-    generic_state->num_roi = MIN(misc->num_roi, 3);
-    generic_state->max_delta_qp = misc->max_delta_qp;
-    generic_state->min_delta_qp = misc->min_delta_qp;
-
-    for (i = 0; i < generic_state->num_roi; i++) {
-        generic_state->roi[i].left = misc->roi->roi_rectangle.x;
-        generic_state->roi[i].right = generic_state->roi[i].left + misc->roi->roi_rectangle.width;
-        generic_state->roi[i].top = misc->roi->roi_rectangle.y;
-        generic_state->roi[i].bottom = generic_state->roi[i].top + misc->roi->roi_rectangle.height;
-        generic_state->roi[i].value = misc->roi->roi_value;
-
-        generic_state->roi[i].left /= 16;
-        generic_state->roi[i].right /= 16;
-        generic_state->roi[i].top /= 16;
-        generic_state->roi[i].bottom /= 16;
+    /*  HRD */
+    if (generic_state->internal_rate_mode != VA_RC_CQP)
+    {
+        generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
+        generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
     }
-}
 
-static void
-gen9_avc_update_misc_parameters(VADriverContextP ctx,
-                                struct encode_state *encode_state,
-                                struct intel_encoder_context *encoder_context)
-{
-    int i,j;
-    VAEncMiscParameterBuffer *misc_param;
-
-    for (i = 0; i < ARRAY_ELEMS(encode_state->misc_param); i++) {
-        for (j = 0; j < ARRAY_ELEMS(encode_state->misc_param[0]); j++) {
-            if (!encode_state->misc_param[i][j] || !encode_state->misc_param[i][j]->buffer)
-                continue;
-
-            misc_param = (VAEncMiscParameterBuffer *)encode_state->misc_param[i][0]->buffer;
-
-            switch (misc_param->type) {
-            case VAEncMiscParameterTypeFrameRate:
-                gen9_avc_update_framerate_parameters(ctx,
-                                                     encoder_context,
-                                                     (VAEncMiscParameterFrameRate *)misc_param->data);
-                break;
+    /* ROI */
+    generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
+    if (generic_state->num_roi > 0) {
+        generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
+        generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
 
-            case VAEncMiscParameterTypeRateControl:
-                gen9_avc_update_rate_control_parameters(ctx,
-                                                        encoder_context,
-                                                        (VAEncMiscParameterRateControl *)misc_param->data);
-                break;
-
-            case VAEncMiscParameterTypeHRD:
-                gen9_avc_update_hrd_parameters(ctx,
-                                               encoder_context,
-                                               (VAEncMiscParameterHRD *)misc_param->data);
-                break;
-
-            case VAEncMiscParameterTypeROI:
-                gen9_avc_update_roi_parameters(ctx,
-                                               encoder_context,
-                                               (VAEncMiscParameterBufferROI *)misc_param->data);
-                break;
+        for (i = 0; i < generic_state->num_roi; i++) {
+            generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
+            generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
+            generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
+            generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
+            generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
 
-            default:
-                break;
-            }
+            generic_state->roi[i].left /= 16;
+            generic_state->roi[i].right /= 16;
+            generic_state->roi[i].top /= 16;
+            generic_state->roi[i].bottom /= 16;
         }
     }
+
 }
 
 static bool
-intel_avc_get_kernel_header_and_size(void                             *pvbinary,
-                                     int                              binary_size,
-                                     INTEL_GENERIC_ENC_OPERATION      operation,
-                                     int                              krnstate_idx,
-                                     struct i965_kernel               *ret_kernel)
+intel_avc_get_kernel_header_and_size(void *pvbinary,
+                                     int binary_size,
+                                     INTEL_GENERIC_ENC_OPERATION operation,
+                                     int krnstate_idx,
+                                     struct i965_kernel *ret_kernel)
 {
     typedef uint32_t BIN_PTR[4];
 
@@ -683,12 +620,6 @@ gen9_avc_init_check_surfaces(VADriverContextP ctx,
     if (!obj_surface || !obj_surface->bo)
         return VA_STATUS_ERROR_INVALID_SURFACE;
 
-    if (obj_surface->private_data &&
-        obj_surface->free_private_data != gen9_free_surfaces_avc) {
-        obj_surface->free_private_data(&obj_surface->private_data);
-        obj_surface->private_data = NULL;
-    }
-
     if (obj_surface->private_data) {
         return VA_STATUS_SUCCESS;
     }
@@ -738,22 +669,26 @@ gen9_avc_init_check_surfaces(VADriverContextP ctx,
     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
 
-    downscaled_width_32x = generic_state->frame_width_32x;
-    downscaled_height_32x = generic_state->frame_height_32x;
-    i965_CreateSurfaces(ctx,
-                        downscaled_width_32x,
-                        downscaled_height_32x,
-                        VA_RT_FORMAT_YUV420,
-                        1,
-                        &avc_surface->scaled_32x_surface_id);
-    avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
+    if(generic_state->b32xme_supported ||
+       generic_state->b32xme_enabled)
+    {
+        downscaled_width_32x = generic_state->frame_width_32x;
+        downscaled_height_32x = generic_state->frame_height_32x;
+        i965_CreateSurfaces(ctx,
+                            downscaled_width_32x,
+                            downscaled_height_32x,
+                            VA_RT_FORMAT_YUV420,
+                            1,
+                            &avc_surface->scaled_32x_surface_id);
+        avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
 
-    if (!avc_surface->scaled_32x_surface_obj) {
-        return VA_STATUS_ERROR_ALLOCATION_FAILED;
-    }
+        if (!avc_surface->scaled_32x_surface_obj) {
+            return VA_STATUS_ERROR_ALLOCATION_FAILED;
+        }
 
-    i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
-                                VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+        i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
+                                    VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+    }
 
     /*mb code and mv data for each frame*/
     size = frame_mb_nums * 16 * 4;
@@ -813,7 +748,7 @@ gen9_avc_allocate_resources(VADriverContextP ctx,
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
     unsigned int size  = 0;
@@ -835,17 +770,6 @@ gen9_avc_allocate_resources(VADriverContextP ctx,
     if (!allocate_flag)
         goto failed_allocation;
 
-    i965_free_gpe_resource(&avc_ctx->res_slice_batch_buffer_2nd_level);
-    /* include (dw) 2* (ref_id + weight_state + pak_insert_obj) + slice state(11) + slice/pps/sps headers, no mb code size
-       2*(10 + 98 + X) + 11*/
-    size = 4096 + (320 * 4 + 80 + 16) * encode_state->num_slice_params_ext;
-    allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
-                             &avc_ctx->res_slice_batch_buffer_2nd_level,
-                             ALIGN(size,0x1000),
-                             "second levle batch (slice) buffer");
-    if (!allocate_flag)
-        goto failed_allocation;
-
     /* scaling related surface   */
     if(avc_state->mb_status_supported)
     {
@@ -1059,17 +983,6 @@ gen9_avc_allocate_resources(VADriverContextP ctx,
             goto failed_allocation;
     }
 
-
-    /* maybe it is not needed by now. it is used in crypt mode*/
-    i965_free_gpe_resource(&avc_ctx->res_brc_mbenc_curbe_write_buffer);
-    size = ALIGN(sizeof(gen9_avc_mbenc_curbe_data), 64) + ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) ;//* NUM_GEN9_AVC_KERNEL_MBENC;
-    allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
-                             &avc_ctx->res_brc_mbenc_curbe_write_buffer,
-                             size,
-                             "mbenc curbe data buffer");
-    if (!allocate_flag)
-        goto failed_allocation;
-
     /*     mbenc related surface. it share most of surface with other kernels     */
     if(avc_state->arbitrary_num_mbs_in_slice)
     {
@@ -1153,9 +1066,9 @@ gen9_avc_allocate_resources(VADriverContextP ctx,
                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
         }
         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
-        i965_object_surface_to_2d_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0]);
+        i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0],GPE_RESOURCE_ALIGNMENT);
         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
-        i965_object_surface_to_2d_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1]);
+        i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1],GPE_RESOURCE_ALIGNMENT);
     }
 
     /* other   */
@@ -1181,13 +1094,12 @@ gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
     if(!vme_context)
         return;
 
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     VADriverContextP ctx = avc_ctx->ctx;
     int i = 0;
 
     /* free all the surface/buffer here*/
     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
-    i965_free_gpe_resource(&avc_ctx->res_slice_batch_buffer_2nd_level);
     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
@@ -1205,7 +1117,6 @@ gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
-    i965_free_gpe_resource(&avc_ctx->res_brc_mbenc_curbe_write_buffer);
     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
@@ -1233,7 +1144,7 @@ gen9_avc_run_kernel_media_object(VADriverContextP ctx,
                              struct gpe_media_object_parameter *param)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
 
     struct intel_batchbuffer *batch = encoder_context->base.batch;
     struct encoder_status_buffer_internal *status_buffer;
@@ -1243,6 +1154,7 @@ gen9_avc_run_kernel_media_object(VADriverContextP ctx,
         return;
 
     intel_batchbuffer_start_atomic(batch, 0x1000);
+    intel_batchbuffer_emit_mi_flush(batch);
 
     status_buffer = &(avc_ctx->status_buffer);
     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
@@ -1251,7 +1163,6 @@ gen9_avc_run_kernel_media_object(VADriverContextP ctx,
     mi_store_data_imm.dw0 = media_function;
     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
 
-    intel_batchbuffer_emit_mi_flush(batch);
     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
     gen8_gpe_media_object(ctx, gpe_context, batch, param);
     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
@@ -1271,7 +1182,7 @@ gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
                                     struct gpe_media_object_walker_parameter *param)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
 
     struct intel_batchbuffer *batch = encoder_context->base.batch;
     struct encoder_status_buffer_internal *status_buffer;
@@ -1532,7 +1443,7 @@ gen9_avc_kernel_scaling(VADriverContextP ctx,
                         int hme_type)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
@@ -1675,7 +1586,7 @@ gen9_avc_kernel_scaling(VADriverContextP ctx,
 
     generic_ctx->pfn_send_scaling_surface(ctx,encode_state,gpe_context,encoder_context,&surface_param);
 
-
+    /* setup the interface data */
     gen8_gpe_setup_interface_data(ctx, gpe_context);
 
     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
@@ -1700,6 +1611,7 @@ gen9_avc_kernel_scaling(VADriverContextP ctx,
 
     return VA_STATUS_SUCCESS;
 }
+
 /*
 frame/mb brc related function
 */
@@ -1733,7 +1645,7 @@ gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
     pstate->dw3.image_structure = 0;//frame is zero
     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
-    pstate->dw3.brc_domain_rate_control_enable = 0;//1,set for vdenc;
+    pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
 
@@ -1756,7 +1668,7 @@ gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
     pstate->dw5.frame_size_over_flag = 1;
     pstate->dw5.frame_size_under_flag = 1;
     pstate->dw5.intra_mb_ipcm_flag = 1;
-    pstate->dw5.mb_rate_ctrl_flag = 0;             /* Always 0 in VDEnc mode */
+    pstate->dw5.mb_rate_ctrl_flag = 0;
     pstate->dw5.non_first_pass_flag = 0;
     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
     pstate->dw5.aq_chroma_disable = 1;
@@ -1793,7 +1705,7 @@ gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
     pstate->dw11.frame_bitrate_max_delta = 0;
 
     pstate->dw12.vad_error_logic = 1;
-    /* TODO: set paramters DW19/DW20 for slices */
+    /* set paramters DW19/DW20 for slices */
 }
 
 void gen9_avc_set_image_state(VADriverContextP ctx,
@@ -1881,7 +1793,7 @@ gen9_avc_init_brc_const_data(VADriverContextP ctx,
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
 
@@ -2039,7 +1951,7 @@ gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
                                  struct intel_encoder_context *encoder_context)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
 
@@ -2178,7 +2090,7 @@ gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
     if(seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR )
     {
         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
-        if(generic_state->internal_rate_mode == INTEL_BRC_CBR)
+        if(generic_state->internal_rate_mode == VA_RC_CBR)
         {
             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
 
@@ -2191,12 +2103,12 @@ gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled)? 0 : 0x8000;
 
 
-    if(generic_state->internal_rate_mode == INTEL_BRC_CBR)
+    if(generic_state->internal_rate_mode == VA_RC_CBR)
     { //CBR
         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
         cmd->dw8.brc_flag = cmd->dw8.brc_flag |INTEL_ENCODE_BRCINIT_ISCBR;
 
-    }else if(generic_state->internal_rate_mode == INTEL_BRC_VBR)
+    }else if(generic_state->internal_rate_mode == VA_RC_VBR)
     {//VBR
         if(cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate)
         {
@@ -2273,7 +2185,7 @@ gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
 
-    cmd->dw22.sliding_window_size = generic_state->window_size;
+    cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
 
     i965_gpe_context_unmap_curbe(gpe_context);
 
@@ -2288,7 +2200,7 @@ gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
                                      void * param_mbenc)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
 
     gen9_add_buffer_gpe_surface(ctx,
                                 gpe_context,
@@ -2314,7 +2226,7 @@ gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
                                struct intel_encoder_context *encoder_context)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
 
@@ -2475,7 +2387,7 @@ gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
                                        void * param_brc)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct brc_param * param = (struct brc_param *)param_brc ;
     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
 
@@ -2569,7 +2481,7 @@ gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
 
@@ -2724,7 +2636,7 @@ gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
                                     void * param_mbenc)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
 
     /* brc history buffer*/
@@ -2780,7 +2692,7 @@ gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
 
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
 
@@ -2952,7 +2864,7 @@ gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
                         struct intel_encoder_context *encoder_context)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
@@ -3471,7 +3383,7 @@ gen9_avc_send_surface_mbenc(VADriverContextP ctx,
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
     struct object_surface *obj_surface;
@@ -3806,7 +3718,7 @@ gen9_avc_kernel_mbenc(VADriverContextP ctx,
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
 
@@ -4107,7 +4019,7 @@ gen9_avc_send_surface_me(VADriverContextP ctx,
 
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
 
     struct object_surface *obj_surface, *input_surface;
@@ -4344,7 +4256,7 @@ gen9_avc_kernel_me(VADriverContextP ctx,
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
 
     struct i965_gpe_context *gpe_context;
@@ -4464,7 +4376,7 @@ gen9_avc_send_surface_wp(VADriverContextP ctx,
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
     struct wp_param * curbe_param = (struct wp_param *)param;
     struct object_surface *obj_surface;
@@ -4510,7 +4422,7 @@ gen9_avc_kernel_wp(VADriverContextP ctx,
                    unsigned int list1_in_use)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
 
@@ -4575,14 +4487,14 @@ gen9_avc_set_curbe_sfd(VADriverContextP ctx,
     memset(cmd,0,sizeof(gen9_avc_sfd_curbe_data));
 
     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
-    cmd->dw0.enable_adaptive_mv_stream_in = 0 ; //vdenc
-    cmd->dw0.stream_in_type = 7 ;             //vdenc
+    cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
+    cmd->dw0.stream_in_type = 7 ;
     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
     cmd->dw0.vdenc_mode_disable = 1 ;
 
     cmd->dw1.hme_stream_in_ref_cost = 5 ;
-    cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;//vdenc
+    cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
 
     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
@@ -4623,7 +4535,7 @@ gen9_avc_send_surface_sfd(VADriverContextP ctx,
                           void * param)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct i965_gpe_resource *gpe_resource;
     int size = 0;
 
@@ -4661,7 +4573,7 @@ gen9_avc_kernel_sfd(VADriverContextP ctx,
                     struct intel_encoder_context *encoder_context)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
 
     struct i965_gpe_context *gpe_context;
@@ -4700,7 +4612,7 @@ kernel related function:init/destroy etc
 static void
 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
                              struct generic_encoder_context *generic_context,
-                             struct gen9_avc_scaling_context *kernel_context)
+                             struct gen_avc_scaling_context *kernel_context)
 {
     struct i965_gpe_context *gpe_context = NULL;
     struct encoder_kernel_parameter kernel_param ;
@@ -4724,8 +4636,8 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
 
     memset(&common_kernel, 0, sizeof(common_kernel));
 
-    intel_avc_get_kernel_header_and_size((void *)media_avc_kernels,
-                                         sizeof(media_avc_kernels),
+    intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                         generic_context->enc_kernel_size,
                                          INTEL_GENERIC_ENC_SCALING4X,
                                          0,
                                          &common_kernel);
@@ -4746,8 +4658,8 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
 
     memset(&common_kernel, 0, sizeof(common_kernel));
 
-    intel_avc_get_kernel_header_and_size((void *)media_avc_kernels,
-                                         sizeof(media_avc_kernels),
+    intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                         generic_context->enc_kernel_size,
                                          INTEL_GENERIC_ENC_SCALING2X,
                                          0,
                                          &common_kernel);
@@ -4762,7 +4674,7 @@ gen9_avc_kernel_init_scaling(VADriverContextP ctx,
 static void
 gen9_avc_kernel_init_me(VADriverContextP ctx,
                         struct generic_encoder_context *generic_context,
-                        struct gen9_avc_me_context *kernel_context)
+                        struct gen_avc_me_context *kernel_context)
 {
     struct i965_gpe_context *gpe_context = NULL;
     struct encoder_kernel_parameter kernel_param ;
@@ -4787,8 +4699,8 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
 
         memset(&common_kernel, 0, sizeof(common_kernel));
 
-        intel_avc_get_kernel_header_and_size((void *)media_avc_kernels,
-                                             sizeof(media_avc_kernels),
+        intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                             generic_context->enc_kernel_size,
                                              INTEL_GENERIC_ENC_ME,
                                              i,
                                              &common_kernel);
@@ -4804,7 +4716,7 @@ gen9_avc_kernel_init_me(VADriverContextP ctx,
 static void
 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
                            struct generic_encoder_context *generic_context,
-                           struct gen9_avc_mbenc_context *kernel_context)
+                           struct gen_avc_mbenc_context *kernel_context)
 {
     struct i965_gpe_context *gpe_context = NULL;
     struct encoder_kernel_parameter kernel_param ;
@@ -4829,8 +4741,8 @@ gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
 
         memset(&common_kernel, 0, sizeof(common_kernel));
 
-        intel_avc_get_kernel_header_and_size((void *)media_avc_kernels,
-                                             sizeof(media_avc_kernels),
+        intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                             generic_context->enc_kernel_size,
                                              INTEL_GENERIC_ENC_MBENC,
                                              i,
                                              &common_kernel);
@@ -4846,7 +4758,7 @@ gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
 static void
 gen9_avc_kernel_init_brc(VADriverContextP ctx,
                          struct generic_encoder_context *generic_context,
-                         struct gen9_avc_brc_context *kernel_context)
+                         struct gen_avc_brc_context *kernel_context)
 {
     struct i965_gpe_context *gpe_context = NULL;
     struct encoder_kernel_parameter kernel_param ;
@@ -4880,8 +4792,8 @@ gen9_avc_kernel_init_brc(VADriverContextP ctx,
 
         memset(&common_kernel, 0, sizeof(common_kernel));
 
-        intel_avc_get_kernel_header_and_size((void *)media_avc_kernels,
-                                             sizeof(media_avc_kernels),
+        intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                             generic_context->enc_kernel_size,
                                              INTEL_GENERIC_ENC_BRC,
                                              i,
                                              &common_kernel);
@@ -4897,7 +4809,7 @@ gen9_avc_kernel_init_brc(VADriverContextP ctx,
 static void
 gen9_avc_kernel_init_wp(VADriverContextP ctx,
                         struct generic_encoder_context *generic_context,
-                        struct gen9_avc_wp_context *kernel_context)
+                        struct gen_avc_wp_context *kernel_context)
 {
     struct i965_gpe_context *gpe_context = NULL;
     struct encoder_kernel_parameter kernel_param ;
@@ -4920,8 +4832,8 @@ gen9_avc_kernel_init_wp(VADriverContextP ctx,
 
     memset(&common_kernel, 0, sizeof(common_kernel));
 
-    intel_avc_get_kernel_header_and_size((void *)media_avc_kernels,
-                                         sizeof(media_avc_kernels),
+    intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                         generic_context->enc_kernel_size,
                                          INTEL_GENERIC_ENC_WP,
                                          0,
                                          &common_kernel);
@@ -4936,7 +4848,7 @@ gen9_avc_kernel_init_wp(VADriverContextP ctx,
 static void
 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
                          struct generic_encoder_context *generic_context,
-                         struct gen9_avc_sfd_context *kernel_context)
+                         struct gen_avc_sfd_context *kernel_context)
 {
     struct i965_gpe_context *gpe_context = NULL;
     struct encoder_kernel_parameter kernel_param ;
@@ -4959,8 +4871,8 @@ gen9_avc_kernel_init_sfd(VADriverContextP ctx,
 
     memset(&common_kernel, 0, sizeof(common_kernel));
 
-    intel_avc_get_kernel_header_and_size((void *)media_avc_kernels,
-                                         sizeof(media_avc_kernels),
+    intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
+                                         generic_context->enc_kernel_size,
                                          INTEL_GENERIC_ENC_SFD,
                                          0,
                                          &common_kernel);
@@ -4976,7 +4888,7 @@ static void
 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
 {
 
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
 
     int i = 0;
 
@@ -5080,7 +4992,7 @@ gen9_avc_update_parameters(VADriverContextP ctx,
     generic_state->gop_size = seq_param->intra_period;
     generic_state->gop_ref_distance = seq_param->ip_period;
 
-    if (generic_state->internal_rate_mode == INTEL_BRC_CBR) {
+    if (generic_state->internal_rate_mode == VA_RC_CBR) {
         generic_state->max_bit_rate = generic_state->target_bit_rate;
         generic_state->min_bit_rate = generic_state->target_bit_rate;
     }
@@ -5109,8 +5021,8 @@ gen9_avc_update_parameters(VADriverContextP ctx,
     generic_state->curr_pak_pass = 0;
     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
 
-    if (generic_state->internal_rate_mode == INTEL_BRC_CBR ||
-        generic_state->internal_rate_mode == INTEL_BRC_VBR)
+    if (generic_state->internal_rate_mode == VA_RC_CBR ||
+        generic_state->internal_rate_mode == VA_RC_VBR)
         generic_state->brc_enabled = 1;
     else
         generic_state->brc_enabled = 0;
@@ -5132,7 +5044,7 @@ gen9_avc_update_parameters(VADriverContextP ctx,
         generic_state->min_bit_rate = 0;
         generic_state->init_vbv_buffer_fullness_in_bit = 0;
         generic_state->vbv_buffer_size_in_bit = 0;
-        generic_state->num_pak_passes = 2;
+        generic_state->num_pak_passes = 1;
     } else {
         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
     }
@@ -5241,16 +5153,16 @@ gen9_avc_encode_check_parameter(VADriverContextP ctx,
 
     switch (rate_control_mode & 0x7f) {
     case VA_RC_CBR:
-        generic_state->internal_rate_mode = INTEL_BRC_CBR;
+        generic_state->internal_rate_mode = VA_RC_CBR;
         break;
 
     case VA_RC_VBR:
-        generic_state->internal_rate_mode = INTEL_BRC_VBR;
+        generic_state->internal_rate_mode = VA_RC_VBR;
         break;
 
     case VA_RC_CQP:
     default:
-        generic_state->internal_rate_mode = INTEL_BRC_CQP;
+        generic_state->internal_rate_mode = VA_RC_CQP;
         break;
     }
 
@@ -5281,12 +5193,12 @@ gen9_avc_encode_check_parameter(VADriverContextP ctx,
         avc_state->sfd_enable = 0;
     }
 
-    if(generic_state->window_size == 0)
+    if(generic_state->frames_per_window_size == 0)
     {
-        generic_state->window_size = (generic_state->frames_per_100s/100 < 60)?(generic_state->frames_per_100s/100):60;
-    }else if(generic_state->window_size > 2 * generic_state->frames_per_100s/100)
+        generic_state->frames_per_window_size = (generic_state->frames_per_100s/100 < 60)?(generic_state->frames_per_100s/100):60;
+    }else if(generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s/100)
     {
-        generic_state->window_size = (generic_state->frames_per_100s/100 < 60)?(generic_state->frames_per_100s/100):60;
+        generic_state->frames_per_window_size = (generic_state->frames_per_100s/100 < 60)?(generic_state->frames_per_100s/100):60;
     }
 
     if(generic_state->brc_enabled)
@@ -5300,7 +5212,7 @@ gen9_avc_encode_check_parameter(VADriverContextP ctx,
         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
     }else
     {
-        generic_state->num_pak_passes = 2;// CQP only one pass
+        generic_state->num_pak_passes = 1;// CQP only one pass
     }
 
     avc_state->mbenc_i_frame_dist_in_use = 0;
@@ -5395,7 +5307,8 @@ gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
 {
     VAStatus va_status;
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
 
@@ -5417,6 +5330,7 @@ gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
 
     if (va_status != VA_STATUS_SUCCESS)
         return va_status;
+
     memset(&surface_param,0,sizeof(surface_param));
     surface_param.frame_width = generic_state->frame_width_in_pixel;
     surface_param.frame_height = generic_state->frame_height_in_pixel;
@@ -5435,8 +5349,6 @@ gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1]);
        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2],avc_priv_surface->dmv_top);
        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1],avc_priv_surface->dmv_bottom);
-       dri_bo_reference(avc_priv_surface->dmv_top);
-       dri_bo_reference(avc_priv_surface->dmv_bottom);
        avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
        avc_priv_surface->frame_store_id = 0;
        avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
@@ -5445,8 +5357,8 @@ gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = avc_priv_surface->top_field_order_cnt;
        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = avc_priv_surface->top_field_order_cnt + 1;
     }
-    i965_free_gpe_resource(&avc_ctx->res_reconstructed_surface);
-    i965_object_surface_to_2d_gpe_resource(&avc_ctx->res_reconstructed_surface, obj_surface);
+    i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
+    i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
 
     /* input YUV surface*/
     obj_surface = encode_state->input_yuv_object;
@@ -5454,8 +5366,8 @@ gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
 
     if (va_status != VA_STATUS_SUCCESS)
         return va_status;
-    i965_free_gpe_resource(&avc_ctx->res_uncompressed_input_surface);
-    i965_object_surface_to_2d_gpe_resource(&avc_ctx->res_uncompressed_input_surface, obj_surface);
+    i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
+    i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
 
     /* Reference surfaces */
     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
@@ -5467,7 +5379,7 @@ gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
         avc_state->top_field_poc[2*i+1] = 0;
 
         if (obj_surface && obj_surface->bo) {
-            i965_object_surface_to_2d_gpe_resource(&avc_ctx->list_reference_res[i], obj_surface);
+            i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface,GPE_RESOURCE_ALIGNMENT);
 
             /* actually it should be handled when it is reconstructed surface*/
             va_status = gen9_avc_init_check_surfaces(ctx,
@@ -5478,8 +5390,6 @@ gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2],avc_priv_surface->dmv_top);
             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1],avc_priv_surface->dmv_bottom);
-            dri_bo_reference(avc_priv_surface->dmv_top);
-            dri_bo_reference(avc_priv_surface->dmv_bottom);
             avc_state->top_field_poc[2*i] = avc_priv_surface->top_field_order_cnt;
             avc_state->top_field_poc[2*i+1] = avc_priv_surface->top_field_order_cnt + 1;
             avc_priv_surface->frame_store_id = i;
@@ -5492,15 +5402,13 @@ gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
     /* Encoded bitstream ?*/
     obj_buffer = encode_state->coded_buf_object;
     bo = obj_buffer->buffer_store->bo;
-    i965_free_gpe_resource(&avc_ctx->compressed_bitstream.res);
-    i965_dri_object_to_buffer_gpe_resource(&avc_ctx->compressed_bitstream.res, bo);
-    avc_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
-    avc_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
+    i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
+    i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
+    generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
+    generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
 
     /*status buffer */
-    dri_bo_unreference(avc_ctx->status_buffer.bo);
     avc_ctx->status_buffer.bo = bo;
-    dri_bo_reference(bo);
 
     /* set the internal flag to 0 to indicate the coded size is unknown */
     dri_bo_map(bo, 1);
@@ -5766,7 +5674,7 @@ gen9_avc_vme_context_destroy(void * context)
 {
     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
 
@@ -5775,20 +5683,11 @@ gen9_avc_vme_context_destroy(void * context)
 
     gen9_avc_kernel_destroy(vme_context);
 
-    if(generic_ctx)
-        free(generic_ctx);
-
-    if(avc_ctx)
-        free(avc_ctx);
-
-    if(generic_state)
-        free(generic_state);
-
-    if(avc_state)
-        free(avc_state);
-
-    if(vme_context)
-        free(vme_context);
+    free(generic_ctx);
+    free(avc_ctx);
+    free(generic_state);
+    free(avc_state);
+    free(vme_context);
     return;
 
 }
@@ -5798,10 +5697,9 @@ gen9_avc_kernel_init(VADriverContextP ctx,
                      struct intel_encoder_context *encoder_context)
 {
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
 
-
     gen9_avc_kernel_init_scaling(ctx,generic_ctx,&avc_ctx->context_scaling);
     gen9_avc_kernel_init_brc(ctx,generic_ctx,&avc_ctx->context_brc);
     gen9_avc_kernel_init_me(ctx,generic_ctx,&avc_ctx->context_me);
@@ -5828,7 +5726,8 @@ gen9_avc_kernel_init(VADriverContextP ctx,
     generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
     generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
-}
+} 
+
 
 /*
 PAK pipeline related function
@@ -5842,7 +5741,7 @@ gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
                               struct intel_encoder_context *encoder_context)
 {
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
     struct intel_batchbuffer *batch = encoder_context->base.batch;
 
@@ -5853,12 +5752,12 @@ gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
                   (0 << 29) |
                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
                   (MFD_MODE_VLD << 15) |
-                  (0 << 13) |                   /* VDEnc mode  is 1*/
+                  (0 << 13) |                   /* Non-VDEnc mode  is 0*/
                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes -1)) << 10) |                   /* Stream-Out Enable */
                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
                   (0 << 7)  |                   /* Scaled surface enable */
-                  (0 << 6)  |                   /* Frame statistics stream out enable, always '1' in VDEnc mode */
+                  (0 << 6)  |                   /* Frame statistics stream out enable */
                   (0 << 5)  |                   /* not in stitch mode */
                   (1 << 4)  |                   /* encoding mode */
                   (MFX_FORMAT_AVC << 0));
@@ -5912,8 +5811,10 @@ gen9_mfc_avc_surface_state(VADriverContextP ctx,
 static void
 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
     struct intel_batchbuffer *batch = encoder_context->base.batch;
     int i;
 
@@ -5922,33 +5823,33 @@ gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_cont
     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
 
     /* the DW1-3 is for pre_deblocking */
-    OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, 0);
+    OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
 
     /* the DW4-6 is for the post_deblocking */
-    OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, 0);
+    OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
 
     /* the DW7-9 is for the uncompressed_picture */
-    OUT_BUFFER_3DW(batch, avc_ctx->res_uncompressed_input_surface.bo, 1, 0, 0);
+    OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
 
     /* the DW10-12 is for PAK information (write) */
-    OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, 0);//?
+    OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
 
     /* the DW13-15 is for the intra_row_store_scratch */
-    OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, 0);
+    OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
 
     /* the DW16-18 is for the deblocking filter */
-    OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, 0);
+    OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
 
     /* the DW 19-50 is for Reference pictures*/
     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
-        OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 1, 0);
+        OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
     }
 
     /* DW 51, reference picture attributes */
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
     /* The DW 52-54 is for PAK information (read) */
-    OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, 0);
+    OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
 
     /* the DW 55-57 is the ILDB buffer */
     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
@@ -5970,8 +5871,9 @@ gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
                                      struct encode_state *encode_state,
                                      struct intel_encoder_context *encoder_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
     struct intel_batchbuffer *batch = encoder_context->base.batch;
     struct object_surface *obj_surface;
@@ -5989,17 +5891,17 @@ gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
     BEGIN_BCS_BATCH(batch, 26);
 
     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
-    /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
+    /* The DW1-5 is for the MFX indirect bistream offset */
     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
     OUT_BUFFER_2DW(batch, NULL, 0, 0);
 
-    /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
+    /* the DW6-10 is for MFX Indirect MV Object Base Address */
     size = w_mb * h_mb * 32 * 4;
     OUT_BUFFER_3DW(batch,
                    avc_priv_surface->res_mv_data_surface.bo,
                    1,
                    0,
-                   0);
+                   i965->intel.mocs_state);
     OUT_BUFFER_2DW(batch,
                    avc_priv_surface->res_mv_data_surface.bo,
                    1,
@@ -6017,14 +5919,14 @@ gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
      * Note: an offset is specified in MFX_AVC_SLICE_STATE
      */
     OUT_BUFFER_3DW(batch,
-                   avc_ctx->compressed_bitstream.res.bo,
+                   generic_ctx->compressed_bitstream.res.bo,
                    1,
                    0,
-                   0);
+                   i965->intel.mocs_state);
     OUT_BUFFER_2DW(batch,
-                   avc_ctx->compressed_bitstream.res.bo,
+                   generic_ctx->compressed_bitstream.res.bo,
                    1,
-                   avc_ctx->compressed_bitstream.end_offset);
+                   generic_ctx->compressed_bitstream.end_offset);
 
     ADVANCE_BCS_BATCH(batch);
 }
@@ -6032,8 +5934,9 @@ gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
 static void
 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
     struct intel_batchbuffer *batch = encoder_context->base.batch;
 
     BEGIN_BCS_BATCH(batch, 10);
@@ -6041,7 +5944,7 @@ gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_
     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
 
     /* The DW1-3 is for bsd/mpc row store scratch buffer */
-    OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, 0);
+    OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
 
     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
@@ -6056,9 +5959,10 @@ static void
 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
                               struct intel_encoder_context *encoder_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = encoder_context->base.batch;
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
 
     int i;
@@ -6071,25 +5975,23 @@ gen9_mfc_avc_directmode_state(VADriverContextP ctx,
     /* the DW1-32 is for the direct MV for reference */
     for(i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
         if ( avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
-            OUT_BCS_RELOC(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
+            OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
                           I915_GEM_DOMAIN_INSTRUCTION, 0,
                           0);
-            OUT_BCS_BATCH(batch, 0);
         } else {
             OUT_BCS_BATCH(batch, 0);
             OUT_BCS_BATCH(batch, 0);
         }
     }
 
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
-    /* the DW34-36 is the MV for the current reference */
-    OUT_BCS_RELOC(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
-                  I915_GEM_DOMAIN_INSTRUCTION, 0,
+    /* the DW34-36 is the MV for the current frame */
+    OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                   0);
 
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
     /* POL list */
     for(i = 0; i < 32; i++) {
@@ -6133,7 +6035,7 @@ gen9_mfc_avc_qm_state(VADriverContextP ctx,
     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
 
-    /* TODO: add support for non flat matrix */
+
     const unsigned int *qm_4x4_intra;
     const unsigned int *qm_4x4_inter;
     const unsigned int *qm_8x8_intra;
@@ -6186,7 +6088,10 @@ gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
     int i, j;
     for (i = 0; i < len; i++)
        for (j = 0; j < len; j++)
+       {
+           assert(qm[j * len + i]);
            fqm[i * len + j] = (1 << 16) / qm[j * len + i];
+       }
 }
 
 static void
@@ -6194,7 +6099,6 @@ gen9_mfc_avc_fqm_state(VADriverContextP ctx,
                       struct encode_state *encode_state,
                       struct intel_encoder_context *encoder_context)
 {
-    /* TODO: add support for non flat matrix */
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
@@ -6234,10 +6138,9 @@ gen9_mfc_avc_insert_object(VADriverContextP ctx,
                            struct intel_encoder_context *encoder_context,
                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
-                           int slice_header_indicator)
+                           int slice_header_indicator,
+                           struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = encoder_context->base.batch;
-
     if (data_bits_in_last_dw == 0)
        data_bits_in_last_dw = 32;
 
@@ -6252,7 +6155,7 @@ gen9_mfc_avc_insert_object(VADriverContextP ctx,
                   (!!emulation_flag << 3) |
                   ((!!is_last_header) << 2) |
                   ((!!is_end_of_slice) << 1) |
-                  (0 << 0));    /* TODO: check this flag */
+                  (0 << 0));    /* check this flag */
     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
 
     ADVANCE_BCS_BATCH(batch);
@@ -6262,7 +6165,8 @@ static void
 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
                                       struct encode_state *encode_state,
                                       struct intel_encoder_context *encoder_context,
-                                      int slice_index)
+                                      int slice_index,
+                                      struct intel_batchbuffer *batch)
 {
     VAEncPackedHeaderParameterBuffer *param = NULL;
     unsigned int length_in_bits;
@@ -6305,7 +6209,8 @@ gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
                                    0,
                                    0,
                                    !param->has_emulation_bytes,
-                                   0);
+                                   0,
+                                   batch);
     }
 
     if (slice_header_index == -1) {
@@ -6328,7 +6233,8 @@ gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
                                    slice_header_length_in_bits & 0x1f,
                                    5,  /* first 5 bytes are start code + nal unit type */
                                    1, 0, 1,
-                                   1);
+                                   1,
+                                   batch);
 
         free(slice_header);
     } else {
@@ -6353,7 +6259,8 @@ gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
                                    1,
                                    0,
                                    !param->has_emulation_bytes,
-                                   1);
+                                   1,
+                                   batch);
     }
 
     return;
@@ -6364,7 +6271,8 @@ gen9_mfc_avc_inset_headers(VADriverContextP ctx,
                            struct encode_state *encode_state,
                            struct intel_encoder_context *encoder_context,
                            VAEncSliceParameterBufferH264 *slice_param,
-                           int slice_index)
+                           int slice_index,
+                           struct intel_batchbuffer *batch)
 {
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
@@ -6392,7 +6300,8 @@ gen9_mfc_avc_inset_headers(VADriverContextP ctx,
                                        0,
                                        0,
                                        !param->has_emulation_bytes,
-                                       0);
+                                       0,
+                                       batch);
         }
 
         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
@@ -6417,7 +6326,8 @@ gen9_mfc_avc_inset_headers(VADriverContextP ctx,
                                        0,
                                        0,
                                        !param->has_emulation_bytes,
-                                       0);
+                                       0,
+                                       batch);
         }
 
         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
@@ -6441,16 +6351,18 @@ gen9_mfc_avc_inset_headers(VADriverContextP ctx,
                                        0,
                                        0,
                                        !param->has_emulation_bytes,
-                                       0);
-        } else if (internal_rate_mode == INTEL_BRC_CBR) {
-            /* TODO: insert others */
+                                       0,
+                                       batch);
+        } else if (internal_rate_mode == VA_RC_CBR) {
+            /* insert others */
         }
     }
 
     gen9_mfc_avc_insert_slice_packed_data(ctx,
                                           encode_state,
                                           encoder_context,
-                                          slice_index);
+                                          slice_index,
+                                          batch);
 }
 
 static void
@@ -6459,13 +6371,13 @@ gen9_mfc_avc_slice_state(VADriverContextP ctx,
                          struct intel_encoder_context *encoder_context,
                          VAEncPictureParameterBufferH264 *pic_param,
                          VAEncSliceParameterBufferH264 *slice_param,
-                         VAEncSliceParameterBufferH264 *next_slice_param)
+                         VAEncSliceParameterBufferH264 *next_slice_param,
+                         struct intel_batchbuffer *batch)
 {
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
-    struct intel_batchbuffer *batch = encoder_context->base.batch;
     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
     unsigned char correct[6], grow, shrink;
@@ -6476,6 +6388,8 @@ gen9_mfc_avc_slice_state(VADriverContextP ctx,
     int num_ref_l0 = 0, num_ref_l1 = 0;
     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    unsigned int rc_panic_enable = 0;
+    unsigned int rate_control_counter_enable = 0;
     unsigned int rounding_value = 0;
     unsigned int rounding_inter_enable = 0;
 
@@ -6525,11 +6439,11 @@ gen9_mfc_avc_slice_state(VADriverContextP ctx,
     }
 
     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
-    slice_ver_pos = slice_param->macroblock_address / generic_state->frame_height_in_mbs;
+    slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
 
     if (next_slice_param) {
         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
-        next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_height_in_mbs;
+        next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
     } else {
         next_slice_hor_pos = 0;
         next_slice_ver_pos = generic_state->frame_height_in_mbs;
@@ -6565,13 +6479,19 @@ gen9_mfc_avc_slice_state(VADriverContextP ctx,
         }
     }
 
-    max_qp_n = 0;       /* TODO: update it */
-    max_qp_p = 0;       /* TODO: update it */
-    grow = 0;           /* TODO: update it */
-    shrink = 0;         /* TODO: update it */
+    max_qp_n = 0;
+    max_qp_p = 0;
+    grow = 0;
+    shrink = 0;
+
+    rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
+    rc_panic_enable = (avc_state->rc_panic_enable &&
+                      (!avc_state->min_max_qp_enable) &&
+                      (encoder_context->rate_control_mode != VA_RC_CQP) &&
+                      (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
 
     for (i = 0; i < 6; i++)
-        correct[i] = 0; /* TODO: update it */
+        correct[i] = 0;
 
     BEGIN_BCS_BATCH(batch, 11);
 
@@ -6600,11 +6520,11 @@ gen9_mfc_avc_slice_state(VADriverContextP ctx,
                   next_slice_hor_pos);
 
     OUT_BCS_BATCH(batch,
-                  (0 << 31) |           /* TODO: ignore it for VDENC ??? */
+                  (rate_control_counter_enable << 31) |
                   (1 << 30) |           /* ResetRateControlCounter */
                   (2 << 28) |           /* Loose Rate Control */
                   (0 << 24) |           /* RC Stable Tolerance */
-                  (0 << 23) |           /* RC Panic Enable */
+                  (rc_panic_enable << 23) |           /* RC Panic Enable */
                   (1 << 22) |           /* CBP mode */
                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
@@ -6612,11 +6532,11 @@ gen9_mfc_avc_slice_state(VADriverContextP ctx,
                   (0 << 18) |          /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
                   (1 << 17) |          /* HeaderPresentFlag */
                   (1 << 16) |          /* SliceData PresentFlag */
-                  (0 << 15) |          /* TailPresentFlag, TODO: check it on VDEnc  */
+                  (0 << 15) |          /* TailPresentFlag  */
                   (1 << 13) |          /* RBSP NAL TYPE */
                   (1 << 12));           /* CabacZeroWordInsertionEnable */
 
-    OUT_BCS_BATCH(batch, avc_ctx->compressed_bitstream.start_offset);
+    OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
 
     OUT_BCS_BATCH(batch,
                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
@@ -6659,11 +6579,11 @@ static void
 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
                                  struct encode_state *encode_state,
                                  struct intel_encoder_context *encoder_context,
-                                 VAEncSliceParameterBufferH264 *slice_param)
+                                 VAEncSliceParameterBufferH264 *slice_param,
+                                 struct intel_batchbuffer *batch)
 {
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
-    struct intel_batchbuffer *batch = encoder_context->base.batch;
     VAPictureH264 *ref_pic;
     int i, slice_type, ref_idx_shift;
     unsigned int fwd_ref_entry;
@@ -6675,7 +6595,7 @@ gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
 
     if ((slice_type == SLICE_TYPE_P) ||
         (slice_type == SLICE_TYPE_B)) {
-          for (i = 0; i < avc_state->num_refs[0]; i++) {
+          for (i = 0; i < MIN(avc_state->num_refs[0],4); i++) {
               ref_pic = &slice_param->RefPicList0[i];
               ref_idx_shift = i * 8;
 
@@ -6686,7 +6606,7 @@ gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
 
     bwd_ref_entry = 0x80808080;
     if (slice_type == SLICE_TYPE_B) {
-        for (i = 0; i < avc_state->num_refs[1]; i++) {
+        for (i = 0; i < MIN(avc_state->num_refs[1],4); i++) {
             ref_pic = &slice_param->RefPicList1[i];
             ref_idx_shift = i * 8;
 
@@ -6726,9 +6646,9 @@ gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
                                 struct encode_state *encode_state,
                                 struct intel_encoder_context *encoder_context,
                                 VAEncPictureParameterBufferH264 *pic_param,
-                                VAEncSliceParameterBufferH264 *slice_param)
+                                VAEncSliceParameterBufferH264 *slice_param,
+                                struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = encoder_context->base.batch;
     int i, slice_type;
     short weightoffsets[32 * 6];
 
@@ -6798,29 +6718,60 @@ gen9_mfc_avc_single_slice(VADriverContextP ctx,
                           VAEncSliceParameterBufferH264 *next_slice_param,
                           int slice_index)
 {
+    struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
+    struct intel_batchbuffer *batch = encoder_context->base.batch;
+    struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
-    struct intel_batchbuffer *batch = encoder_context->base.batch;
     struct object_surface *obj_surface;
     struct gen9_surface_avc *avc_priv_surface;
 
-    gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
-    gen9_mfc_avc_weightoffset_state(ctx,
-                                    encode_state,
-                                    encoder_context,
-                                    pic_param,
-                                    slice_param);
-    gen9_mfc_avc_slice_state(ctx,
-                             encode_state,
-                             encoder_context,
-                             pic_param,
-                             slice_param,
-                             next_slice_param);
-    gen9_mfc_avc_inset_headers(ctx,
-                               encode_state,
-                               encoder_context,
-                               slice_param,
-                               slice_index);
+    unsigned int slice_offset = 0;
+
+    if(generic_state->curr_pak_pass == 0)
+    {
+        slice_offset = intel_batchbuffer_used_size(slice_batch);
+        avc_state->slice_batch_offset[slice_index] = slice_offset;
+        gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param,slice_batch);
+        gen9_mfc_avc_weightoffset_state(ctx,
+                                        encode_state,
+                                        encoder_context,
+                                        pic_param,
+                                        slice_param,
+                                        slice_batch);
+        gen9_mfc_avc_slice_state(ctx,
+                                 encode_state,
+                                 encoder_context,
+                                 pic_param,
+                                 slice_param,
+                                 next_slice_param,
+                                 slice_batch);
+        gen9_mfc_avc_inset_headers(ctx,
+                                   encode_state,
+                                   encoder_context,
+                                   slice_param,
+                                   slice_index,
+                                   slice_batch);
+
+        BEGIN_BCS_BATCH(slice_batch, 2);
+        OUT_BCS_BATCH(slice_batch, 0);
+        OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
+        ADVANCE_BCS_BATCH(slice_batch);
+
+    }else
+    {
+        slice_offset = avc_state->slice_batch_offset[slice_index];
+    }
+    /* insert slice as second levle.*/
+    memset(&second_level_batch, 0, sizeof(second_level_batch));
+    second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
+    second_level_batch.offset = slice_offset;
+    second_level_batch.bo = slice_batch->buffer;
+    gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
+
     /* insert mb code as second levle.*/
     obj_surface = encode_state->reconstructed_object;
     assert(obj_surface->private_data);
@@ -6844,8 +6795,8 @@ gen9_avc_pak_slice_level(VADriverContextP ctx,
     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
     int i, j;
     int slice_index = 0;
-    int is_frame_level = 1;       /* TODO: check it for SKL,now single slice per frame */
-    int has_tail = 0;             /* TODO: check it later */
+    int is_frame_level = 1;       /* check it for SKL,now single slice per frame */
+    int has_tail = 0;             /* check it later */
 
     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
@@ -6873,7 +6824,7 @@ gen9_avc_pak_slice_level(VADriverContextP ctx,
             if (is_frame_level)
                 break;
             else {
-                /* TODO: remove assert(0) and add other commands here */
+                /* remove assert(0) and add other commands here */
                 assert(0);
             }
         }
@@ -6883,7 +6834,7 @@ gen9_avc_pak_slice_level(VADriverContextP ctx,
     }
 
     if (has_tail) {
-        /* TODO: insert a tail if required */
+        /* insert a tail if required */
     }
 
     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
@@ -6896,7 +6847,8 @@ gen9_avc_pak_picture_level(VADriverContextP ctx,
                            struct intel_encoder_context *encoder_context)
 {
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
     struct intel_batchbuffer *batch = encoder_context->base.batch;
@@ -6908,16 +6860,16 @@ gen9_avc_pak_picture_level(VADriverContextP ctx,
         status_buffer = &(avc_ctx->status_buffer);
 
         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
-        mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_ctrl_offset;
+        mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
         mi_conditional_batch_buffer_end_params.compare_data = 0;
-        mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 1;
+        mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
     }
 
     gen9_mfc_avc_pipe_mode_select(ctx,encode_state,encoder_context);
-    gen9_mfc_avc_surface_state(ctx,encoder_context,&(avc_ctx->res_reconstructed_surface),0);
-    gen9_mfc_avc_surface_state(ctx,encoder_context,&(avc_ctx->res_uncompressed_input_surface),4);
+    gen9_mfc_avc_surface_state(ctx,encoder_context,&(generic_ctx->res_reconstructed_surface),0);
+    gen9_mfc_avc_surface_state(ctx,encoder_context,&(generic_ctx->res_uncompressed_input_surface),4);
     gen9_mfc_avc_pipe_buf_addr_state(ctx,encoder_context);
     gen9_mfc_avc_ind_obj_base_addr_state(ctx,encode_state,encoder_context);
     gen9_mfc_avc_bsp_buf_base_addr_state(ctx,encoder_context);
@@ -6955,7 +6907,7 @@ gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
 {
     struct intel_batchbuffer *batch = encoder_context->base.batch;
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
 
     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
@@ -6971,19 +6923,24 @@ gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
     /* read register and store into status_buffer and pak_statitistic info */
     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
     mi_store_reg_mem_param.bo = status_buffer->bo;
-    mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
-    mi_store_reg_mem_param.mmio_offset = status_buffer->bs_frame_reg_offset;
+    mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
+    mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
 
     mi_store_reg_mem_param.bo = status_buffer->bo;
-    mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
-    mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
+    mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
+    mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
 
     /*update the status in the pak_statistic_surface */
     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
     mi_store_reg_mem_param.offset = 0;
-    mi_store_reg_mem_param.mmio_offset = status_buffer->bs_frame_reg_offset;
+    mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
+    gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+
+    mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
+    mi_store_reg_mem_param.offset = 4;
+    mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
 
     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
@@ -7013,19 +6970,21 @@ gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
 
     switch (rate_control_mode & 0x7f) {
     case VA_RC_CBR:
-        generic_state->internal_rate_mode = INTEL_BRC_CBR;
+        generic_state->internal_rate_mode = VA_RC_CBR;
         break;
 
     case VA_RC_VBR:
-        generic_state->internal_rate_mode = INTEL_BRC_VBR;//AVBR
+        generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
         break;
 
     case VA_RC_CQP:
     default:
-        generic_state->internal_rate_mode = INTEL_BRC_CQP;
+        generic_state->internal_rate_mode = VA_RC_CQP;
         break;
     }
 
+    if (encoder_context->quality_level == 0)\r
+        encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;\r
 }
 
 static VAStatus
@@ -7036,7 +6995,8 @@ gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
     VAStatus va_status;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
 
@@ -7081,6 +7041,7 @@ gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
 
     if (va_status != VA_STATUS_SUCCESS)
         return va_status;
+
     memset(&surface_param,0,sizeof(surface_param));
     surface_param.frame_width = generic_state->frame_width_in_pixel;
     surface_param.frame_height = generic_state->frame_height_in_pixel;
@@ -7098,8 +7059,6 @@ gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1]);
        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2],avc_priv_surface->dmv_top);
        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1],avc_priv_surface->dmv_bottom);
-       dri_bo_reference(avc_priv_surface->dmv_top);
-       dri_bo_reference(avc_priv_surface->dmv_bottom);
        avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
        avc_priv_surface->frame_store_id = 0;
        avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
@@ -7108,16 +7067,16 @@ gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = avc_priv_surface->top_field_order_cnt;
        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = avc_priv_surface->top_field_order_cnt + 1;
     }
-    i965_free_gpe_resource(&avc_ctx->res_reconstructed_surface);
+    i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
-    i965_object_surface_to_2d_gpe_resource(&avc_ctx->res_reconstructed_surface, obj_surface);
+    i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
 
 
     if (avc_state->enable_avc_ildb) {
-        i965_object_surface_to_2d_gpe_resource(&avc_ctx->res_post_deblocking_output, obj_surface);
+        i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface,GPE_RESOURCE_ALIGNMENT);
     } else {
-        i965_object_surface_to_2d_gpe_resource(&avc_ctx->res_pre_deblocking_output, obj_surface);
+        i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface,GPE_RESOURCE_ALIGNMENT);
     }
     /* input YUV surface */
     obj_surface = encode_state->input_yuv_object;
@@ -7125,8 +7084,8 @@ gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
 
     if (va_status != VA_STATUS_SUCCESS)
         return va_status;
-    i965_free_gpe_resource(&avc_ctx->res_uncompressed_input_surface);
-    i965_object_surface_to_2d_gpe_resource(&avc_ctx->res_uncompressed_input_surface, obj_surface);
+    i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
+    i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
 
     /* Reference surfaces */
     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
@@ -7138,7 +7097,7 @@ gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
         avc_state->top_field_poc[2*i+1] = 0;
 
         if (obj_surface && obj_surface->bo) {
-            i965_object_surface_to_2d_gpe_resource(&avc_ctx->list_reference_res[i], obj_surface);
+            i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface,GPE_RESOURCE_ALIGNMENT);
 
             /* actually it should be handled when it is reconstructed surface */
             va_status = gen9_avc_init_check_surfaces(ctx,
@@ -7149,8 +7108,6 @@ gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2],avc_priv_surface->dmv_top);
             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1],avc_priv_surface->dmv_bottom);
-            dri_bo_reference(avc_priv_surface->dmv_top);
-            dri_bo_reference(avc_priv_surface->dmv_bottom);
             avc_priv_surface->frame_store_id = i;
             avc_state->top_field_poc[2*i] = avc_priv_surface->top_field_order_cnt;
             avc_state->top_field_poc[2*i+1] = avc_priv_surface->top_field_order_cnt+1;
@@ -7160,6 +7117,24 @@ gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
         }
     }
 
+    if (avc_ctx->pres_slice_batch_buffer_2nd_level)
+    {
+        intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
+        avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
+    }
+
+    avc_ctx->pres_slice_batch_buffer_2nd_level =
+        intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
+                              4096 *
+                              encode_state->num_slice_params_ext);
+    if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
+        return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
+    for (i = 0;i < MAX_AVC_SLICE_NUM;i++) {
+        avc_state->slice_batch_offset[i] = 0;
+    }
+
+
     size = w_mb * 64;
     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
@@ -7211,7 +7186,7 @@ gen9_avc_encode_picture(VADriverContextP ctx,
     VAStatus va_status;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
     struct intel_batchbuffer *batch = encoder_context->base.batch;
 
@@ -7220,16 +7195,16 @@ gen9_avc_encode_picture(VADriverContextP ctx,
     if (va_status != VA_STATUS_SUCCESS)
         return va_status;
 
+    if (i965->intel.has_bsd2)
+        intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
+    else
+        intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+    intel_batchbuffer_emit_mi_flush(batch);
+
     for (generic_state->curr_pak_pass = 0;
          generic_state->curr_pak_pass < generic_state->num_pak_passes;
          generic_state->curr_pak_pass++) {
 
-         if (i965->intel.has_bsd2)
-             intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
-         else
-             intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
-         intel_batchbuffer_emit_mi_flush(batch);
-
          if (generic_state->curr_pak_pass == 0) {
              /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
              struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
@@ -7243,14 +7218,19 @@ gen9_avc_encode_picture(VADriverContextP ctx,
          }
          gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
          gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
-         intel_batchbuffer_emit_mi_flush(batch);
-
          gen9_avc_read_mfc_status(ctx, encoder_context);
-         intel_batchbuffer_end_atomic(batch);
-         intel_batchbuffer_flush(batch);
 
     }
 
+    if (avc_ctx->pres_slice_batch_buffer_2nd_level)
+    {
+        intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
+        avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
+    }
+
+    intel_batchbuffer_end_atomic(batch);
+    intel_batchbuffer_flush(batch);
+
     generic_state->seq_frame_number++;
     generic_state->total_frame_number++;
     generic_state->first_frame = 0;
@@ -7284,7 +7264,8 @@ static void
 gen9_avc_pak_context_destroy(void * context)
 {
     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
-    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
+    struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
 
     int i = 0;
 
@@ -7292,12 +7273,12 @@ gen9_avc_pak_context_destroy(void * context)
         return;
 
     // other things
-    i965_free_gpe_resource(&avc_ctx->res_reconstructed_surface);
+    i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
-    i965_free_gpe_resource(&avc_ctx->res_uncompressed_input_surface);
+    i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
 
-    i965_free_gpe_resource(&avc_ctx->compressed_bitstream.res);
+    i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
@@ -7312,6 +7293,13 @@ gen9_avc_pak_context_destroy(void * context)
     {
         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
     }
+
+    if (avc_ctx->pres_slice_batch_buffer_2nd_level)
+    {
+        intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
+        avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
+    }
+
 }
 
 static VAStatus
@@ -7325,7 +7313,7 @@ gen9_avc_get_coded_status(VADriverContextP ctx,
         return VA_STATUS_ERROR_INVALID_BUFFER;
 
     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
-    coded_buf_seg->base.size = avc_encode_status->bs_byte_count;
+    coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
 
     return VA_STATUS_SUCCESS;
 }
@@ -7334,9 +7322,10 @@ Bool
 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
 {
     /* VME & PAK share the same context */
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct encoder_vme_mfc_context * vme_context = NULL;
     struct generic_encoder_context * generic_ctx = NULL;
-    struct gen9_avc_encoder_context * avc_ctx = NULL;
+    struct i965_avc_encoder_context * avc_ctx = NULL;
     struct generic_enc_codec_state * generic_state = NULL;
     struct avc_enc_state * avc_state = NULL;
     struct encoder_status_buffer_internal *status_buffer;
@@ -7344,7 +7333,7 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
 
     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
-    avc_ctx = calloc(1, sizeof(struct gen9_avc_encoder_context));
+    avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
     avc_state = calloc(1, sizeof(struct avc_enc_state));
 
@@ -7353,7 +7342,7 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
 
     memset(vme_context,0,sizeof(struct encoder_vme_mfc_context));
     memset(generic_ctx,0,sizeof(struct generic_encoder_context));
-    memset(avc_ctx,0,sizeof(struct gen9_avc_encoder_context));
+    memset(avc_ctx,0,sizeof(struct i965_avc_encoder_context));
     memset(generic_state,0,sizeof(struct generic_enc_codec_state));
     memset(avc_state,0,sizeof(struct avc_enc_state));
 
@@ -7363,8 +7352,16 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
     vme_context->generic_enc_state = generic_state;
     vme_context->private_enc_state = avc_state;
 
-    avc_ctx->ctx = ctx;
+    if (IS_SKL(i965->intel.device_info)||
+        IS_BXT(i965->intel.device_info)) {
+        generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
+        generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
+    }
+    else
+        goto allocate_structure_failed;
+
     /* initialize misc ? */
+    avc_ctx->ctx = ctx;
     generic_ctx->use_hw_scoreboard = 1;
     generic_ctx->use_hw_non_stalling_scoreboard = 1;
 
@@ -7433,7 +7430,7 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
     generic_state->brc_init_reset_buf_size_in_bits = 0;
     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
-    generic_state->window_size = 0;//default
+    generic_state->frames_per_window_size = 0;//default
     generic_state->target_percentage = 0;
 
     generic_state->avbr_curracy = 0;
@@ -7479,6 +7476,7 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
     avc_state->enable_avc_ildb = 0;
     avc_state->mbaff_flag = 0;
     avc_state->enable_force_skip = 1;//default
+    avc_state->rc_panic_enable = 1;//default
     avc_state->suppress_recon_enable = 1;//default
 
     avc_state->ref_pic_select_list_supported = 1;
@@ -7560,13 +7558,19 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
     memset(status_buffer, 0,sizeof(struct encoder_status_buffer_internal));
 
     status_buffer->base_offset = base_offset;
-    status_buffer->bs_byte_count_offset = base_offset + offsetof(struct encoder_status, bs_byte_count);
+    status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
+    status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
+    status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
+    status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
 
     status_buffer->status_buffer_size = sizeof(struct encoder_status);
-    status_buffer->bs_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
+    status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
+    status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
+    status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
+    status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
 
     gen9_avc_kernel_init(ctx,encoder_context);
     encoder_context->vme_context = vme_context;
@@ -7577,21 +7581,11 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
 
 allocate_structure_failed:
 
-    if(vme_context)
-        free(vme_context);
-
-    if(generic_ctx)
-        free(generic_ctx);
-
-    if(avc_ctx)
-        free(avc_ctx);
-
-    if(generic_state)
-        free(generic_state);
-
-    if(avc_state)
-        free(avc_state);
-
+    free(vme_context);
+    free(generic_ctx);
+    free(avc_ctx);
+    free(generic_state);
+    free(avc_state);
     return false;
 }