OSDN Git Service

Update CBR algo for H.264 per tempolar layer
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
index 5c3f82f..33226a5 100644 (file)
 #include "i965_encoder_utils.h"
 #include "gen6_mfc.h"
 #include "gen6_vme.h"
+#include "gen9_mfc.h"
 #include "intel_media.h"
 
-#define BRC_CLIP(x, min, max)                                   \
-    {                                                           \
-        x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
-    }
-
-#define BRC_P_B_QP_DIFF 4
-#define BRC_I_P_QP_DIFF 2
-#define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
-
-#define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
-#define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
-
-#define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
-#define BRC_CY 0.1 /* weight for */
-#define BRC_CX_UNDERFLOW 5.
-#define BRC_CX_OVERFLOW -4.
-
-#define BRC_PI_0_5 1.5707963267948966192313216916398
-
 #ifndef HAVE_LOG2F
 #define log2f(x) (logf(x)/(float)M_LN2)
 #endif
@@ -86,25 +68,12 @@ int intel_avc_enc_slice_type_fixup(int slice_type)
 
 static void
 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
-                                        struct gen6_mfc_context *mfc_context)
+                                        struct intel_encoder_context *encoder_context)
 {
-    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
-    int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
-    int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
-    float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
-    int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
-    int intra_mb_size = inter_mb_size * 5.0;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     int i;
 
-    mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
-
     for(i = 0 ; i < 3; i++) {
-        mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
         mfc_context->bit_rate_control_context[i].GrowInit = 6;
@@ -119,78 +88,83 @@ intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
     }
-    
-    mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
-
-    mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
 }
 
 static void intel_mfc_brc_init(struct encode_state *encode_state,
                                struct intel_encoder_context* encoder_context)
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
-    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
-    VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
-    VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
-    double bitrate = pSequenceParameter->bits_per_second;
-    double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
-    int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
-    int intra_period = pSequenceParameter->intra_period;
-    int ip_period = pSequenceParameter->ip_period;
-    double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
-    double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
-    double bpf;
-
-    if (pSequenceParameter->ip_period) {
-        pnum = (intra_period + ip_period - 1)/ip_period - 1;
-        bnum = intra_period - inum - pnum;
-    }
+    double bitrate, framerate;
+    double qp1_size = 0.1 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
+    double qp51_size = 0.001 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
+    double bpf, factor;
+    int inum = encoder_context->brc.num_iframes_in_gop,
+        pnum = encoder_context->brc.num_pframes_in_gop,
+        bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
+    int intra_period = encoder_context->brc.gop_size;
+    int i;
 
     mfc_context->brc.mode = encoder_context->rate_control_mode;
 
-    mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
-                                                             (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
-    mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
-    mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
-
     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
 
-    bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
-
-    mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
+    mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
     mfc_context->hrd.current_buffer_fullness =
-        (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
-        pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
+        (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
+        encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
     mfc_context->hrd.violation_noted = 0;
 
-    if ((bpf > qp51_size) && (bpf < qp1_size)) {
-        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
-    }
-    else if (bpf >= qp1_size)
-        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
-    else if (bpf <= qp51_size)
-        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
+    for (i = 0; i < encoder_context->layer.num_layers; i++) {
+        mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
+        mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
+        mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
+
+        if (i == 0) {
+            bitrate = encoder_context->brc.bits_per_second[0];
+            framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
+        } else {
+            bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
+            framerate = (double)(encoder_context->brc.framerate_per_100s[i] - encoder_context->brc.framerate_per_100s[i - 1]) / 100.0;
+        }
+
+        if (i == encoder_context->layer.num_layers - 1)
+            factor = 1.0;
+        else
+            factor = (double)encoder_context->brc.framerate_per_100s[i] / encoder_context->brc.framerate_per_100s[i + 1];
+
+        mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period * factor)/framerate) /
+                                                                    (double)(inum + BRC_PWEIGHT * pnum * factor + BRC_BWEIGHT * bnum * factor));
+        mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
+        mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
 
-    mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
-    mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
+        bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
 
-    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
-    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
-    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
+        if ((bpf > qp51_size) && (bpf < qp1_size)) {
+            mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
+        }
+        else if (bpf >= qp1_size)
+            mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
+        else if (bpf <= qp51_size)
+            mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
+
+        mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
+        mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
+
+        BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
+        BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
+        BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
+    }
 }
 
 int intel_mfc_update_hrd(struct encode_state *encode_state,
-                         struct gen6_mfc_context *mfc_context,
+                         struct intel_encoder_context *encoder_context,
                          int frame_bits)
 {
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     double prev_bf = mfc_context->hrd.current_buffer_fullness;
 
     mfc_context->hrd.current_buffer_fullness -= frame_bits;
@@ -200,7 +174,7 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
         return BRC_UNDERFLOW;
     }
     
-    mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
+    mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame[encoder_context->layer.curr_frame_layer_id];
     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
         if (mfc_context->brc.mode == VA_RC_VBR)
             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
@@ -213,15 +187,15 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
 }
 
 int intel_mfc_brc_postpack(struct encode_state *encode_state,
-                           struct gen6_mfc_context *mfc_context,
+                           struct intel_encoder_context *encoder_context,
                            int frame_bits)
 {
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
-    int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
-    int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
-    int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
+    int curr_frame_layer_id, next_frame_layer_id;
+    int qpi, qpp, qpb;
     int qp; // quantizer of previously encoded slice of current type
     int qpn; // predicted quantizer for next frame of current type in integer format
     double qpf; // predicted quantizer for next frame of current type in float format
@@ -232,15 +206,41 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
      *  y - how far we are from target HRD buffer fullness
      */
     double x, y;
-    double frame_size_alpha;
+    double frame_size_alpha, factor;
+
+    if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
+        curr_frame_layer_id = 0;
+        next_frame_layer_id = 0;
+    } else {
+        curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
+        next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
+    }
+
+    /* checking wthether HRD compliance first */
+    sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+    if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
+        /* nothing */
+    } else {
+        next_frame_layer_id = curr_frame_layer_id;
+    }
+
+    if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0)
+        factor = 1.0;
+    else
+        factor = (double)encoder_context->brc.framerate_per_100s[next_frame_layer_id] / encoder_context->brc.framerate_per_100s[encoder_context->layer.num_layers - 1];
 
-    qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
+    qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
+    qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
+    qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
 
-    target_frame_size = mfc_context->brc.target_frame_size[slicetype];
+    qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
+
+    target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
     if (mfc_context->hrd.buffer_capacity < 5)
         frame_size_alpha = 0;
     else
-        frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
+        frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype] * factor;
     if (frame_size_alpha > 30) frame_size_alpha = 30;
     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
         (double)(frame_size_alpha + 1.);
@@ -269,9 +269,6 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
     /* making sure that with QP predictions we did do not leave QPs range */
     BRC_CLIP(qpn, 1, 51);
 
-    /* checking wthether HRD compliance is still met */
-    sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
-
     /* calculating QP delta as some function*/
     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
     if (x > 0) {
@@ -296,23 +293,23 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
         /* correcting QPs of slices of other types */
         if (slicetype == SLICE_TYPE_P) {
             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
-                mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
+                mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
-                mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
+                mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
         } else if (slicetype == SLICE_TYPE_I) {
             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
-                mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
+                mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
-                mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
+                mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
         } else { // SLICE_TYPE_B
             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
-                mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
+                mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
-                mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
+                mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
         }
-        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
-        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
-        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
+        BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
+        BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
+        BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
     } else if (sts == BRC_UNDERFLOW) { // underflow
         if (qpn <= qp) qpn = qp + 1;
         if (qpn > 51) {
@@ -327,7 +324,7 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
         }
     }
 
-    mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
+    mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
 
     return sts;
 }
@@ -336,15 +333,13 @@ static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
                                        struct intel_encoder_context *encoder_context)
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
-    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
     unsigned int rate_control_mode = encoder_context->rate_control_mode;
-    int target_bit_rate = pSequenceParameter->bits_per_second;
+    int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
     
     // current we only support CBR mode.
     if (rate_control_mode == VA_RC_CBR) {
         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
-        mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
-        mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
+        mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
         mfc_context->vui_hrd.i_frame_number = 0;
 
@@ -388,17 +383,20 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
                            struct intel_encoder_context *encoder_context)
 {
     unsigned int rate_control_mode = encoder_context->rate_control_mode;
-    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+    if (encoder_context->codec != CODEC_H264 &&
+        encoder_context->codec != CODEC_H264_MVC)
+        return;
 
     if (rate_control_mode == VA_RC_CBR) {
         /*Programing bit rate control */
-        if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
-            intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
+        if (encoder_context->brc.need_reset) {
+            intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
             intel_mfc_brc_init(encode_state, encoder_context);
         }
 
         /*Programing HRD control */
-        if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
+        if (encoder_context->brc.need_reset)
             intel_mfc_hrd_context_init(encode_state, encoder_context);    
     }
 }
@@ -411,6 +409,7 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
     unsigned int rate_control_mode = encoder_context->rate_control_mode;
+    unsigned int skip_emul_byte_cnt;
 
     if (encode_state->packed_header_data[idx]) {
         VAEncPackedHeaderParameterBuffer *param = NULL;
@@ -421,12 +420,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
         length_in_bits = param->bit_length;
 
+        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
         mfc_context->insert_object(ctx,
                                    encoder_context,
                                    header_data,
                                    ALIGN(length_in_bits, 32) >> 5,
                                    length_in_bits & 0x1f,
-                                   5,   /* FIXME: check it */
+                                   skip_emul_byte_cnt,
                                    0,
                                    0,
                                    !param->has_emulation_bytes,
@@ -444,12 +444,14 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
         length_in_bits = param->bit_length;
 
+        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
+
         mfc_context->insert_object(ctx,
                                    encoder_context,
                                    header_data,
                                    ALIGN(length_in_bits, 32) >> 5,
                                    length_in_bits & 0x1f,
-                                   5, /* FIXME: check it */
+                                   skip_emul_byte_cnt,
                                    0,
                                    0,
                                    !param->has_emulation_bytes,
@@ -467,12 +469,13 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
         length_in_bits = param->bit_length;
 
+        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
         mfc_context->insert_object(ctx,
                                    encoder_context,
                                    header_data,
                                    ALIGN(length_in_bits, 32) >> 5,
                                    length_in_bits & 0x1f,
-                                   5, /* FIXME: check it */
+                                   skip_emul_byte_cnt,
                                    0,
                                    0,
                                    !param->has_emulation_bytes,
@@ -496,7 +499,7 @@ void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
                                    (unsigned int *)sei_data,
                                    ALIGN(length_in_bits, 32) >> 5,
                                    length_in_bits & 0x1f,
-                                   4,   
+                                   5,
                                    0,   
                                    0,   
                                    1,
@@ -523,9 +526,9 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
 
-    if (IS_GEN6(i965->intel.device_id)) {
-       /* On the SNB it should be fixed to 128 for the DMV buffer */
-       width_in_mbs = 128;
+    if (IS_GEN6(i965->intel.device_info)) {
+        /* On the SNB it should be fixed to 128 for the DMV buffer */
+        width_in_mbs = 128;
     }
 
     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
@@ -552,10 +555,11 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
 
     /* Setup current frame and current direct mv buffer*/
     obj_surface = encode_state->reconstructed_object;
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
 
     if ( obj_surface->private_data == NULL) {
         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+        assert(gen6_avc_surface);
         gen6_avc_surface->dmv_top = 
             dri_bo_alloc(i965->intel.bufmgr,
                          "Buffer",
@@ -602,6 +606,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
             if ( obj_surface->private_data == NULL) {
                 
                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+                assert(gen6_avc_surface);
                 gen6_avc_surface->dmv_top = 
                     dri_bo_alloc(i965->intel.bufmgr,
                                  "Buffer",
@@ -628,7 +633,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
             break;
         }
     }
-       
+
     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
 
@@ -700,11 +705,12 @@ int intel_format_lutvalue(int value, int max)
     if (temp1 > temp2)
         ret = max;
     return ret;
-       
+
 }
 
 
 #define                QP_MAX                  52
+#define                VP8_QP_MAX              128
 
 
 static float intel_lambda_qp(int qp)
@@ -718,57 +724,46 @@ static float intel_lambda_qp(int qp)
     return lambdaf;
 }
 
-
-void intel_vme_update_mbmv_cost(VADriverContextP ctx,
-                                struct encode_state *encode_state,
-                                struct intel_encoder_context *encoder_context)
+static
+void intel_h264_calc_mbmvcost_qp(int qp,
+                                 int slice_type,
+                                 uint8_t *vme_state_message)
 {
-    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
-    struct gen6_vme_context *vme_context = encoder_context->vme_context;
-    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
-    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
-    int qp, m_cost, j, mv_count;
-    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+    int m_cost, j, mv_count;
     float   lambda, m_costf;
 
-    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
-
-    
-    if (encoder_context->rate_control_mode == VA_RC_CQP)
-       qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
-    else
-       qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
-  
-    if (vme_state_message == NULL)
-       return;
     assert(qp <= QP_MAX); 
     lambda = intel_lambda_qp(qp);
+
+    m_cost = lambda;
+    vme_state_message[MODE_CHROMA_INTRA] = 0;
+    vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
+
     if (slice_type == SLICE_TYPE_I) {
-       vme_state_message[MODE_INTRA_16X16] = 0;
-       m_cost = lambda * 4;
-       vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
-       m_cost = lambda * 16; 
-       vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
-       m_cost = lambda * 3;
-       vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+        vme_state_message[MODE_INTRA_16X16] = 0;
+        m_cost = lambda * 4;
+        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 16; 
+        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 3;
+        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
     } else {
-       m_cost = 0;
-       vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
-       for (j = 1; j < 3; j++) {
+        m_cost = 0;
+        vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
+        for (j = 1; j < 3; j++) {
             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
             m_cost = (int)m_costf;
             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
-       }
-       mv_count = 3;
-       for (j = 4; j <= 64; j *= 2) {
+        }
+        mv_count = 3;
+        for (j = 4; j <= 64; j *= 2) {
             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
             m_cost = (int)m_costf;
             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
             mv_count++;
-       }
+        }
 
-       if (qp <= 25) {
+        if (qp <= 25) {
             vme_state_message[MODE_INTRA_16X16] = 0x4a;
             vme_state_message[MODE_INTRA_8X8] = 0x4a;
             vme_state_message[MODE_INTRA_4X4] = 0x4a;
@@ -780,17 +775,17 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
             vme_state_message[MODE_INTER_4X4] = 0x4a;
             vme_state_message[MODE_INTER_BWD] = 0x2a;
             return;
-       }
-       m_costf = lambda * 10;
-       vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
-       m_cost = lambda * 14;
-       vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
-       m_cost = lambda * 24; 
-       vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
-       m_costf = lambda * 3.5;
-       m_cost = m_costf;
-       vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
-       if (slice_type == SLICE_TYPE_P) {
+        }
+        m_costf = lambda * 10;
+        vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 14;
+        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 24; 
+        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+        m_costf = lambda * 3.5;
+        m_cost = m_costf;
+        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+        if (slice_type == SLICE_TYPE_P) {
             m_costf = lambda * 2.5;
             m_cost = m_costf;
             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
@@ -808,7 +803,7 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
             /* BWD is not used in P-frame */
             vme_state_message[MODE_INTER_BWD] = 0;
-       } else {
+        } else {
             m_costf = lambda * 2.5;
             m_cost = m_costf;
             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
@@ -827,10 +822,121 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
             m_costf = lambda * 1.5;
             m_cost = m_costf;
             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
-       }
+        }
     }
+    return;
+}
+
+void intel_vme_update_mbmv_cost(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int qp;
+    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+    if (encoder_context->rate_control_mode == VA_RC_CQP)
+        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    else
+        qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
+
+    if (vme_state_message == NULL)
+        return;
+
+    intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
 }
 
+void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
+    VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
+    int qp, m_cost, j, mv_count;
+    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+    float   lambda, m_costf;
+
+    int is_key_frame = !pic_param->pic_flags.bits.frame_type;
+    int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
+  
+    if (vme_state_message == NULL)
+       return;
+    if (encoder_context->rate_control_mode == VA_RC_CQP)
+        qp = q_matrix->quantization_index[0];
+    else
+        qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
+
+    lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
+
+    m_cost = lambda;
+    vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
+
+    if (is_key_frame) {
+       vme_state_message[MODE_INTRA_16X16] = 0;
+       m_cost = lambda * 16; 
+       vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+       m_cost = lambda * 3;
+       vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+    } else {
+       m_cost = 0;
+       vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
+       for (j = 1; j < 3; j++) {
+            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+            m_cost = (int)m_costf;
+            vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
+       }
+       mv_count = 3;
+       for (j = 4; j <= 64; j *= 2) {
+            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+            m_cost = (int)m_costf;
+            vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
+            mv_count++;
+       }
+
+       if (qp < 92 ) {
+            vme_state_message[MODE_INTRA_16X16] = 0x4a;
+            vme_state_message[MODE_INTRA_4X4] = 0x4a;
+            vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
+            vme_state_message[MODE_INTER_16X16] = 0x4a;
+            vme_state_message[MODE_INTER_16X8] = 0x4a;
+            vme_state_message[MODE_INTER_8X8] = 0x4a;
+            vme_state_message[MODE_INTER_4X4] = 0x4a;
+            vme_state_message[MODE_INTER_BWD] = 0;
+            return;
+       }
+       m_costf = lambda * 10;
+       vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+       m_cost = lambda * 24; 
+       vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+            
+        m_costf = lambda * 3.5;
+        m_cost = m_costf;
+        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+
+        m_costf = lambda * 2.5;
+        m_cost = m_costf;
+        vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+        m_costf = lambda * 4;
+        m_cost = m_costf;
+        vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+        m_costf = lambda * 1.5;
+        m_cost = m_costf;
+        vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+        m_costf = lambda * 5;
+        m_cost = m_costf;
+        vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+        /* BWD is not used in P-frame */
+        vme_state_message[MODE_INTER_BWD] = 0;
+    }
+}
 
 #define                MB_SCOREBOARD_A         (1 << 0)
 #define                MB_SCOREBOARD_B         (1 << 1)
@@ -854,7 +960,7 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont
     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
-       
+
     vme_context->gpe_context.vfe_desc7.dword = 0;
     return;
 }
@@ -867,7 +973,7 @@ static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num
         return -1;
     if (y_index < 0 || y_index >= mb_height)
         return -1;
-       
+
     mb_index = y_index * mb_width + x_index;
     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
         return -1;
@@ -886,6 +992,16 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
     int mb_row;
     int s;
     unsigned int *command_ptr;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int qp,qp_mb,qp_index;
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+    if (encoder_context->rate_control_mode == VA_RC_CQP)
+        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    else
+        qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
 
 #define                USE_SCOREBOARD          (1 << 21)
  
@@ -893,103 +1009,117 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
 
     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
-       VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
-       int first_mb = pSliceParameter->macroblock_address;
-       int num_mb = pSliceParameter->num_macroblocks;
-       unsigned int mb_intra_ub, score_dep;
-       int x_outer, y_outer, x_inner, y_inner;
-       int xtemp_outer = 0;
-
-       x_outer = first_mb % mb_width;
-       y_outer = first_mb / mb_width;
-       mb_row = y_outer;
-                                
-       for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
-           x_inner = x_outer;
-           y_inner = y_outer;
-           for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
-               mb_intra_ub = 0;
-               score_dep = 0;
-               if (x_inner != 0) {
-                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
-                   score_dep |= MB_SCOREBOARD_A; 
-               }
-               if (y_inner != mb_row) {
-                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
-                   score_dep |= MB_SCOREBOARD_B;
-                   if (x_inner != 0)
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
-                   if (x_inner != (mb_width -1)) {
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
-                       score_dep |= MB_SCOREBOARD_C;
+        VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+        int first_mb = pSliceParameter->macroblock_address;
+        int num_mb = pSliceParameter->num_macroblocks;
+        unsigned int mb_intra_ub, score_dep;
+        int x_outer, y_outer, x_inner, y_inner;
+        int xtemp_outer = 0;
+
+        x_outer = first_mb % mb_width;
+        y_outer = first_mb / mb_width;
+        mb_row = y_outer;
+
+        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+            x_inner = x_outer;
+            y_inner = y_outer;
+            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+                mb_intra_ub = 0;
+                score_dep = 0;
+                if (x_inner != 0) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                    score_dep |= MB_SCOREBOARD_A; 
+                }
+                if (y_inner != mb_row) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                    score_dep |= MB_SCOREBOARD_B;
+                    if (x_inner != 0)
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+                    if (x_inner != (mb_width -1)) {
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                        score_dep |= MB_SCOREBOARD_C;
                     }
-               }
-                                                       
-               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
-               *command_ptr++ = kernel;
-               *command_ptr++ = USE_SCOREBOARD;
-               /* Indirect data */
-               *command_ptr++ = 0;
-               /* the (X, Y) term of scoreboard */
-               *command_ptr++ = ((y_inner << 16) | x_inner);
-               *command_ptr++ = score_dep;
-               /*inline data */
-               *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
-               *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
-               x_inner -= 2;
-               y_inner += 1;
-           }
-           x_outer += 1;
-       }
+                }
+
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
+                *command_ptr++ = kernel;
+                *command_ptr++ = USE_SCOREBOARD;
+                /* Indirect data */
+                *command_ptr++ = 0;
+                /* the (X, Y) term of scoreboard */
+                *command_ptr++ = ((y_inner << 16) | x_inner);
+                *command_ptr++ = score_dep;
+                /*inline data */
+                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+                /* QP occupies one byte */
+                if (vme_context->roi_enabled) {
+                    qp_index = y_inner * mb_width + x_inner;
+                    qp_mb = *(vme_context->qp_per_mb + qp_index);
+                } else
+                    qp_mb = qp;
+                *command_ptr++ = qp_mb;
+                x_inner -= 2;
+                y_inner += 1;
+            }
+            x_outer += 1;
+        }
 
-       xtemp_outer = mb_width - 2;
-       if (xtemp_outer < 0)
+        xtemp_outer = mb_width - 2;
+        if (xtemp_outer < 0)
             xtemp_outer = 0;
-       x_outer = xtemp_outer;
-       y_outer = first_mb / mb_width;
-       for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
-           y_inner = y_outer;
-           x_inner = x_outer;
-           for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
-               mb_intra_ub = 0;
-               score_dep = 0;
-               if (x_inner != 0) {
-                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
-                   score_dep |= MB_SCOREBOARD_A; 
-               }
-               if (y_inner != mb_row) {
-                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
-                   score_dep |= MB_SCOREBOARD_B;
-                   if (x_inner != 0)
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
-
-                   if (x_inner != (mb_width -1)) {
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
-                       score_dep |= MB_SCOREBOARD_C;
+        x_outer = xtemp_outer;
+        y_outer = first_mb / mb_width;
+        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
+            y_inner = y_outer;
+            x_inner = x_outer;
+            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+                mb_intra_ub = 0;
+                score_dep = 0;
+                if (x_inner != 0) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                    score_dep |= MB_SCOREBOARD_A; 
+                }
+                if (y_inner != mb_row) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                    score_dep |= MB_SCOREBOARD_B;
+                    if (x_inner != 0)
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+                    if (x_inner != (mb_width -1)) {
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                        score_dep |= MB_SCOREBOARD_C;
                     }
-               }
-
-               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
-               *command_ptr++ = kernel;
-               *command_ptr++ = USE_SCOREBOARD;
-               /* Indirect data */
-               *command_ptr++ = 0;
-               /* the (X, Y) term of scoreboard */
-               *command_ptr++ = ((y_inner << 16) | x_inner);
-               *command_ptr++ = score_dep;
-               /*inline data */
-               *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
-               *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
-
-               x_inner -= 2;
-               y_inner += 1;
-           }
-           x_outer++;
-           if (x_outer >= mb_width) {
-               y_outer += 1;
-               x_outer = xtemp_outer;
-           }           
-       }
+                }
+
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
+                *command_ptr++ = kernel;
+                *command_ptr++ = USE_SCOREBOARD;
+                /* Indirect data */
+                *command_ptr++ = 0;
+                /* the (X, Y) term of scoreboard */
+                *command_ptr++ = ((y_inner << 16) | x_inner);
+                *command_ptr++ = score_dep;
+                /*inline data */
+                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+                /* qp occupies one byte */
+                if (vme_context->roi_enabled) {
+                    qp_index = y_inner * mb_width + x_inner;
+                    qp_mb = *(vme_context->qp_per_mb + qp_index);
+                } else
+                    qp_mb = qp;
+                *command_ptr++ = qp_mb;
+
+                x_inner -= 2;
+                y_inner += 1;
+            }
+            x_outer++;
+            if (x_outer >= mb_width) {
+                y_outer += 1;
+                x_outer = xtemp_outer;
+            }          
+        }
     }
 
     *command_ptr++ = 0;
@@ -1032,6 +1162,13 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
 
     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
+        int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
+
+        if (ref_idx_l0 > 3) {
+            WARN_ONCE("ref_idx_l0 is out of range\n");
+            ref_idx_l0 = 0;
+        }
+
         obj_surface = vme_context->used_reference_objects[0];
         frame_index = -1;
         for (i = 0; i < 16; i++) {
@@ -1044,13 +1181,20 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
         if (frame_index == -1) {
             WARN_ONCE("RefPicList0 is not found in DPB!\n");
         } else {
-            /* This is passed by the hacked mode */
-            fref_entry &= ~(0xFF);
-            fref_entry += intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index);
+            int ref_idx_l0_shift = ref_idx_l0 * 8;
+            fref_entry &= ~(0xFF << ref_idx_l0_shift);
+            fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
         }
     }
 
     if (slice_type == SLICE_TYPE_B) {
+        int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
+
+        if (ref_idx_l1 > 3) {
+            WARN_ONCE("ref_idx_l1 is out of range\n");
+            ref_idx_l1 = 0;
+        }
+
         obj_surface = vme_context->used_reference_objects[1];
         frame_index = -1;
         for (i = 0; i < 16; i++) {
@@ -1063,8 +1207,9 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
         if (frame_index == -1) {
             WARN_ONCE("RefPicList1 is not found in DPB!\n");
         } else {
-            bref_entry &= ~(0xFF);
-            bref_entry += intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index);
+            int ref_idx_l1_shift = ref_idx_l1 * 8;
+            bref_entry &= ~(0xFF << ref_idx_l1_shift);
+            bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
         }
     }
 
@@ -1148,7 +1293,7 @@ void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
          */
         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
-                       
+
         vme_state_message[MODE_INTER_16X8] = 0;
         vme_state_message[MODE_INTER_8X8] = 0;
         vme_state_message[MODE_INTER_8X4] = 0;
@@ -1178,105 +1323,105 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
 
     {
-       unsigned int mb_intra_ub, score_dep;
-       int x_outer, y_outer, x_inner, y_inner;
-       int xtemp_outer = 0;
-       int first_mb = 0;
-       int num_mb = mb_width * mb_height;
-
-       x_outer = 0;
-       y_outer = 0;
-       
-                                
-       for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
-           x_inner = x_outer;
-           y_inner = y_outer;
-           for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
-               mb_intra_ub = 0;
-               score_dep = 0;
-               if (x_inner != 0) {
-                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
-                   score_dep |= MB_SCOREBOARD_A; 
-               }
-               if (y_inner != 0) {
-                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
-                   score_dep |= MB_SCOREBOARD_B;
-
-                   if (x_inner != 0)
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
-
-                   if (x_inner != (mb_width -1)) {
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
-                       score_dep |= MB_SCOREBOARD_C;
-                   }
-               }
-                                                       
-               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
-               *command_ptr++ = kernel;
-               *command_ptr++ = MPEG2_SCOREBOARD;
-               /* Indirect data */
-               *command_ptr++ = 0;
-               /* the (X, Y) term of scoreboard */
-               *command_ptr++ = ((y_inner << 16) | x_inner);
-               *command_ptr++ = score_dep;
-               /*inline data */
-               *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
-               *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
-               x_inner -= 2;
-               y_inner += 1;
-           }
-           x_outer += 1;
-       }
+        unsigned int mb_intra_ub, score_dep;
+        int x_outer, y_outer, x_inner, y_inner;
+        int xtemp_outer = 0;
+        int first_mb = 0;
+        int num_mb = mb_width * mb_height;
+
+        x_outer = 0;
+        y_outer = 0;
+
+
+        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+            x_inner = x_outer;
+            y_inner = y_outer;
+            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+                mb_intra_ub = 0;
+                score_dep = 0;
+                if (x_inner != 0) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                    score_dep |= MB_SCOREBOARD_A; 
+                }
+                if (y_inner != 0) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                    score_dep |= MB_SCOREBOARD_B;
+
+                    if (x_inner != 0)
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+                    if (x_inner != (mb_width -1)) {
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                        score_dep |= MB_SCOREBOARD_C;
+                    }
+                }
+
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+                *command_ptr++ = kernel;
+                *command_ptr++ = MPEG2_SCOREBOARD;
+                /* Indirect data */
+                *command_ptr++ = 0;
+                /* the (X, Y) term of scoreboard */
+                *command_ptr++ = ((y_inner << 16) | x_inner);
+                *command_ptr++ = score_dep;
+                /*inline data */
+                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+                *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+                x_inner -= 2;
+                y_inner += 1;
+            }
+            x_outer += 1;
+        }
 
-       xtemp_outer = mb_width - 2;
-       if (xtemp_outer < 0)
+        xtemp_outer = mb_width - 2;
+        if (xtemp_outer < 0)
             xtemp_outer = 0;
-       x_outer = xtemp_outer;
-       y_outer = 0;
-       for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
-           y_inner = y_outer;
-           x_inner = x_outer;
-           for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
-               mb_intra_ub = 0;
-               score_dep = 0;
-               if (x_inner != 0) {
-                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
-                   score_dep |= MB_SCOREBOARD_A; 
-               }
-               if (y_inner != 0) {
-                   mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
-                   score_dep |= MB_SCOREBOARD_B;
-
-                   if (x_inner != 0)
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
-
-                   if (x_inner != (mb_width -1)) {
-                       mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
-                       score_dep |= MB_SCOREBOARD_C;
-                   }
-               }
-
-               *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
-               *command_ptr++ = kernel;
-               *command_ptr++ = MPEG2_SCOREBOARD;
-               /* Indirect data */
-               *command_ptr++ = 0;
-               /* the (X, Y) term of scoreboard */
-               *command_ptr++ = ((y_inner << 16) | x_inner);
-               *command_ptr++ = score_dep;
-               /*inline data */
-               *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
-               *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
-
-               x_inner -= 2;
-               y_inner += 1;
-           }
-           x_outer++;
-           if (x_outer >= mb_width) {
-               y_outer += 1;
-               x_outer = xtemp_outer;
-           }           
-       }
+        x_outer = xtemp_outer;
+        y_outer = 0;
+        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
+            y_inner = y_outer;
+            x_inner = x_outer;
+            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+                mb_intra_ub = 0;
+                score_dep = 0;
+                if (x_inner != 0) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                    score_dep |= MB_SCOREBOARD_A; 
+                }
+                if (y_inner != 0) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                    score_dep |= MB_SCOREBOARD_B;
+
+                    if (x_inner != 0)
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+                    if (x_inner != (mb_width -1)) {
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                        score_dep |= MB_SCOREBOARD_C;
+                    }
+                }
+
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+                *command_ptr++ = kernel;
+                *command_ptr++ = MPEG2_SCOREBOARD;
+                /* Indirect data */
+                *command_ptr++ = 0;
+                /* the (X, Y) term of scoreboard */
+                *command_ptr++ = ((y_inner << 16) | x_inner);
+                *command_ptr++ = score_dep;
+                /*inline data */
+                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+                *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
+
+                x_inner -= 2;
+                y_inner += 1;
+            }
+            x_outer++;
+            if (x_outer >= mb_width) {
+                y_outer += 1;
+                x_outer = xtemp_outer;
+            }          
+        }
     }
 
     *command_ptr++ = 0;
@@ -1336,6 +1481,7 @@ intel_avc_vme_reference_state(VADriverContextP ctx,
     int max_num_references;
     VAPictureH264 *curr_pic;
     VAPictureH264 *ref_list;
+    int ref_idx;
 
     if (list_index == 0) {
         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
@@ -1362,9 +1508,9 @@ intel_avc_vme_reference_state(VADriverContextP ctx,
             obj_surface = encode_state->reference_objects[list_index];
             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
         }
-    } else {
-        int ref_idx;
 
+        ref_idx = 0;
+    } else {
         curr_pic = &pic_param->CurrPic;
 
         /* select the reference frame in temporal space */
@@ -1380,10 +1526,730 @@ intel_avc_vme_reference_state(VADriverContextP ctx,
 
     if (obj_surface &&
         obj_surface->bo) {
+        assert(ref_idx >= 0);
         vme_context->used_reference_objects[list_index] = obj_surface;
         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
+        vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
+                                                    ref_idx << 16 |
+                                                    ref_idx <<  8 |
+                                                    ref_idx);
     } else {
         vme_context->used_reference_objects[list_index] = NULL;
         vme_context->used_references[list_index] = NULL;
+        vme_context->ref_index_in_mb[list_index] = 0;
+    }
+}
+
+void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
+                                        struct encode_state *encode_state,
+                                        struct intel_encoder_context *encoder_context,
+                                        int slice_index,
+                                        struct intel_batchbuffer *slice_batch)
+{
+    int count, i, start_index;
+    unsigned int length_in_bits;
+    VAEncPackedHeaderParameterBuffer *param = NULL;
+    unsigned int *header_data = NULL;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    int slice_header_index;
+
+    if (encode_state->slice_header_index[slice_index] == 0)
+        slice_header_index = -1;
+    else
+        slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
+
+    count = encode_state->slice_rawdata_count[slice_index];
+    start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
+
+    for (i = 0; i < count; i++) {
+        unsigned int skip_emul_byte_cnt;
+
+        header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
+
+        param = (VAEncPackedHeaderParameterBuffer *)
+                    (encode_state->packed_header_params_ext[start_index + i]->buffer);
+
+        /* skip the slice header packed data type as it is lastly inserted */
+        if (param->type == VAEncPackedHeaderSlice)
+            continue;
+
+        length_in_bits = param->bit_length;
+
+        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
+
+        /* as the slice header is still required, the last header flag is set to
+         * zero.
+         */
+        mfc_context->insert_object(ctx,
+                                   encoder_context,
+                                   header_data,
+                                   ALIGN(length_in_bits, 32) >> 5,
+                                   length_in_bits & 0x1f,
+                                   skip_emul_byte_cnt,
+                                   0,
+                                   0,
+                                   !param->has_emulation_bytes,
+                                   slice_batch);
+    }
+
+    if (slice_header_index == -1) {
+        unsigned char *slice_header = NULL;
+        int slice_header_length_in_bits = 0;
+        VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+        VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+        VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
+
+        /* No slice header data is passed. And the driver needs to generate it */
+        /* For the Normal H264 */
+        slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
+                                                             pPicParameter,
+                                                             pSliceParameter,
+                                                             &slice_header);
+        mfc_context->insert_object(ctx, encoder_context,
+                                   (unsigned int *)slice_header,
+                                   ALIGN(slice_header_length_in_bits, 32) >> 5,
+                                   slice_header_length_in_bits & 0x1f,
+                                   5,  /* first 5 bytes are start code + nal unit type */
+                                   1, 0, 1, slice_batch);
+
+        free(slice_header);
+    } else {
+        unsigned int skip_emul_byte_cnt;
+
+        header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
+
+        param = (VAEncPackedHeaderParameterBuffer *)
+                    (encode_state->packed_header_params_ext[slice_header_index]->buffer);
+        length_in_bits = param->bit_length;
+
+        /* as the slice header is the last header data for one slice,
+         * the last header flag is set to one.
+         */
+        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
+
+        mfc_context->insert_object(ctx,
+                                   encoder_context,
+                                   header_data,
+                                   ALIGN(length_in_bits, 32) >> 5,
+                                   length_in_bits & 0x1f,
+                                   skip_emul_byte_cnt,
+                                   1,
+                                   0,
+                                   !param->has_emulation_bytes,
+                                   slice_batch);
+    }
+
+    return;
+}
+
+void
+intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int qp;
+    dri_bo *bo;
+    uint8_t *cost_table;
+
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+
+    if (slice_type == SLICE_TYPE_I) {
+        if (vme_context->i_qp_cost_table)
+            return;
+    } else if (slice_type == SLICE_TYPE_P) {
+        if (vme_context->p_qp_cost_table)
+            return;
+    } else {
+        if (vme_context->b_qp_cost_table)
+            return;
+    }
+
+    /* It is enough to allocate 32 bytes for each qp. */
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "cost_table ",
+                      QP_MAX * 32,
+                      64);
+
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    cost_table = (uint8_t *)(bo->virtual);
+    for (qp = 0; qp < QP_MAX; qp++) {
+        intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
+        cost_table += 32;
+    }
+
+    dri_bo_unmap(bo);
+
+    if (slice_type == SLICE_TYPE_I) {
+        vme_context->i_qp_cost_table = bo;
+    } else if (slice_type == SLICE_TYPE_P) {
+        vme_context->p_qp_cost_table = bo;
+    } else {
+        vme_context->b_qp_cost_table = bo;
+    }
+
+    vme_context->cost_table_size = QP_MAX * 32;
+    return;
+}
+
+extern void
+intel_h264_setup_cost_surface(VADriverContextP ctx,
+                              struct encode_state *encode_state,
+                              struct intel_encoder_context *encoder_context,
+                              unsigned long binding_table_offset,
+                              unsigned long surface_state_offset)
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    dri_bo *bo;
+
+
+    struct i965_buffer_surface cost_table;
+
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+
+    if (slice_type == SLICE_TYPE_I) {
+        bo = vme_context->i_qp_cost_table;
+    } else if (slice_type == SLICE_TYPE_P) {
+        bo = vme_context->p_qp_cost_table;
+    } else {
+        bo = vme_context->b_qp_cost_table;
+    }
+
+    cost_table.bo = bo;
+    cost_table.num_blocks = QP_MAX;
+    cost_table.pitch = 16;
+    cost_table.size_block = 32;
+
+    vme_context->vme_buffer_suface_setup(ctx,
+                                         &vme_context->gpe_context,
+                                         &cost_table,
+                                         binding_table_offset,
+                                         surface_state_offset);
+}
+
+/*
+ * the idea of conversion between qp and qstep comes from scaling process
+ * of transform coeff for Luma component in H264 spec.
+ *   2^(Qpy / 6 - 6)
+ * In order to avoid too small qstep, it is multiplied by 16.
+ */
+static float intel_h264_qp_qstep(int qp)
+{
+    float value, qstep;
+    value = qp;
+    value = value / 6 - 2;
+    qstep = powf(2, value);
+    return qstep;
+}
+
+static int intel_h264_qstep_qp(float qstep)
+{
+    float qp;
+
+    qp = 12.0f + 6.0f * log2f(qstep);
+
+    return floorf(qp);
+}
+
+/*
+ * Currently it is based on the following assumption:
+ * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
+ *                                total_aread * 1 / baseqp_qstep
+ *
+ * qstep is the linearized quantizer of H264 quantizer
+ */
+typedef struct {
+    int row_start_in_mb;
+    int row_end_in_mb;
+    int col_start_in_mb;
+    int col_end_in_mb;
+
+    int width_mbs;
+    int height_mbs;
+
+    int roi_qp;
+} ROIRegionParam;
+
+static VAStatus
+intel_h264_enc_roi_cbr(VADriverContextP ctx,
+                       int base_qp,
+                       VAEncMiscParameterBufferROI *pMiscParamROI,
+                       struct encode_state *encode_state,
+                       struct intel_encoder_context *encoder_context)
+{
+    int nonroi_qp;
+    VAEncROI *region_roi;
+    bool quickfill = 0;
+
+    ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
+    int num_roi = 0;
+    int i,j;
+
+    float temp;
+    float qstep_nonroi, qstep_base;
+    float roi_area, total_area, nonroi_area;
+    float sum_roi;
+
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+    int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+    int mbs_in_picture = width_in_mbs * height_in_mbs;
+
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAStatus vaStatus = VA_STATUS_SUCCESS;
+
+    if(pMiscParamROI != NULL)
+    {
+        num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
+
+        /* currently roi_value_is_qp_delta is the only supported mode of priority.
+        *
+        * qp_delta set by user is added to base_qp, which is then clapped by
+        * [base_qp-min_delta, base_qp+max_delta].
+        */
+        ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
+    }
+
+    /* when the base_qp is lower than 12, the quality is quite good based
+     * on the H264 test experience.
+     * In such case it is unnecessary to adjust the quality for ROI region.
+     */
+    if (base_qp <= 12) {
+        nonroi_qp = base_qp;
+        quickfill = 1;
+        goto qp_fill;
+    }
+
+    sum_roi = 0.0f;
+    roi_area = 0;
+    for (i = 0; i < num_roi; i++) {
+        int row_start, row_end, col_start, col_end;
+        int roi_width_mbs, roi_height_mbs;
+        int mbs_in_roi;
+        int roi_qp;
+        float qstep_roi;
+
+        region_roi =  (VAEncROI *)pMiscParamROI->roi + i;
+
+        col_start = region_roi->roi_rectangle.x;
+        col_end = col_start + region_roi->roi_rectangle.width;
+        row_start = region_roi->roi_rectangle.y;
+        row_end = row_start + region_roi->roi_rectangle.height;
+        col_start = col_start / 16;
+        col_end = (col_end + 15) / 16;
+        row_start = row_start / 16;
+        row_end = (row_end + 15) / 16;
+
+        roi_width_mbs = col_end - col_start;
+        roi_height_mbs = row_end - row_start;
+        mbs_in_roi = roi_width_mbs * roi_height_mbs;
+
+        param_regions[i].row_start_in_mb = row_start;
+        param_regions[i].row_end_in_mb = row_end;
+        param_regions[i].col_start_in_mb = col_start;
+        param_regions[i].col_end_in_mb = col_end;
+        param_regions[i].width_mbs = roi_width_mbs;
+        param_regions[i].height_mbs = roi_height_mbs;
+
+        roi_qp = base_qp + region_roi->roi_value;
+        BRC_CLIP(roi_qp, 1, 51);
+
+        param_regions[i].roi_qp = roi_qp;
+        qstep_roi = intel_h264_qp_qstep(roi_qp);
+
+        roi_area += mbs_in_roi;
+        sum_roi += mbs_in_roi / qstep_roi;
+    }
+
+    total_area = mbs_in_picture;
+    nonroi_area = total_area - roi_area;
+
+    qstep_base = intel_h264_qp_qstep(base_qp);
+    temp = (total_area / qstep_base - sum_roi);
+
+    if (temp < 0) {
+        nonroi_qp = 51;
+    } else {
+        qstep_nonroi = nonroi_area / temp;
+        nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
+    }
+
+    BRC_CLIP(nonroi_qp, 1, 51);
+
+qp_fill:
+    memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
+    if (!quickfill) {
+        char *qp_ptr;
+
+        for (i = 0; i < num_roi; i++) {
+            for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
+                qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
+                memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
+            }
+        }
+    }
+    return vaStatus;
+}
+
+extern void
+intel_h264_enc_roi_config(VADriverContextP ctx,
+                          struct encode_state *encode_state,
+                          struct intel_encoder_context *encoder_context)
+{
+    char *qp_ptr;
+    int i, j;
+    VAEncROI *region_roi;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    VAEncMiscParameterBuffer* pMiscParamROI;
+    VAEncMiscParameterBufferROI *pParamROI = NULL;
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+    int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+
+    int row_start, row_end, col_start, col_end;
+    int num_roi = 0;
+
+    vme_context->roi_enabled = 0;
+    /* Restriction: Disable ROI when multi-slice is enabled */
+    if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
+        return;
+
+    if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
+        pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
+        pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
+
+        /* check whether number of ROI is correct */
+        num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
+    }
+
+    if (num_roi > 0)
+        vme_context->roi_enabled = 1;
+
+    if (!vme_context->roi_enabled)
+        return;
+
+    if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
+        (vme_context->saved_height_mbs != height_in_mbs)) {
+        free(vme_context->qp_per_mb);
+        vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
+
+        vme_context->saved_width_mbs = width_in_mbs;
+        vme_context->saved_height_mbs = height_in_mbs;
+        assert(vme_context->qp_per_mb);
+    }
+    if (encoder_context->rate_control_mode == VA_RC_CBR) {
+        /*
+         * TODO: More complex Qp adjust needs to be added.
+         * Currently it is initialized to slice_qp.
+         */
+        VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+        int qp;
+        int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+        qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
+        intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
+
+    } else if (encoder_context->rate_control_mode == VA_RC_CQP){
+        VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+        VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+        int qp;
+
+        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+        memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
+
+
+        for (j = num_roi; j ; j--) {
+            int qp_delta, qp_clip;
+
+            region_roi =  (VAEncROI *)pParamROI->roi + j - 1;
+
+            col_start = region_roi->roi_rectangle.x;
+            col_end = col_start + region_roi->roi_rectangle.width;
+            row_start = region_roi->roi_rectangle.y;
+            row_end = row_start + region_roi->roi_rectangle.height;
+
+            col_start = col_start / 16;
+            col_end = (col_end + 15) / 16;
+            row_start = row_start / 16;
+            row_end = (row_end + 15) / 16;
+
+            qp_delta = region_roi->roi_value;
+            qp_clip = qp + qp_delta;
+
+            BRC_CLIP(qp_clip, 1, 51);
+
+            for (i = row_start; i < row_end; i++) {
+                qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
+                memset(qp_ptr, qp_clip, (col_end - col_start));
+            }
+        }
+    } else {
+        /*
+         * TODO: Disable it for non CBR-CQP.
+         */
+        vme_context->roi_enabled = 0;
+    }
+
+    if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
+        encoder_context->soft_batch_force = 1;
+
+    return;
+}
+
+/* HEVC */
+static int
+hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
+                           VAPictureHEVC *ref_list,
+                           int num_pictures,
+                           int dir)
+{
+    int i, found = -1, min = 0x7FFFFFFF;
+
+    for (i = 0; i < num_pictures; i++) {
+        int tmp;
+
+        if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
+            (ref_list[i].picture_id == VA_INVALID_SURFACE))
+            break;
+
+        tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
+
+        if (dir)
+            tmp = -tmp;
+
+        if (tmp > 0 && tmp < min) {
+            min = tmp;
+            found = i;
+        }
+    }
+
+    return found;
+}
+void
+intel_hevc_vme_reference_state(VADriverContextP ctx,
+                               struct encode_state *encode_state,
+                               struct intel_encoder_context *encoder_context,
+                               int list_index,
+                               int surface_index,
+                               void (* vme_source_surface_state)(
+                                   VADriverContextP ctx,
+                                   int index,
+                                   struct object_surface *obj_surface,
+                                   struct intel_encoder_context *encoder_context))
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    struct object_surface *obj_surface = NULL;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    VASurfaceID ref_surface_id;
+    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
+    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
+    int max_num_references;
+    VAPictureHEVC *curr_pic;
+    VAPictureHEVC *ref_list;
+    int ref_idx;
+    unsigned int is_hevc10 = 0;
+    GenHevcSurface *hevc_encoder_surface = NULL;
+
+    if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
+        || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
+        is_hevc10 = 1;
+
+    if (list_index == 0) {
+        max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
+        ref_list = slice_param->ref_pic_list0;
+    } else {
+        max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
+        ref_list = slice_param->ref_pic_list1;
+    }
+
+    if (max_num_references == 1) {
+        if (list_index == 0) {
+            ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
+            vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
+        } else {
+            ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
+            vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
+        }
+
+        if (ref_surface_id != VA_INVALID_SURFACE)
+            obj_surface = SURFACE(ref_surface_id);
+
+        if (!obj_surface ||
+            !obj_surface->bo) {
+            obj_surface = encode_state->reference_objects[list_index];
+            vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
+        }
+
+        ref_idx = 0;
+    } else {
+        curr_pic = &pic_param->decoded_curr_pic;
+
+        /* select the reference frame in temporal space */
+        ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
+        ref_surface_id = ref_list[ref_idx].picture_id;
+
+        if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
+            obj_surface = SURFACE(ref_surface_id);
+
+        vme_context->used_reference_objects[list_index] = obj_surface;
+        vme_context->used_references[list_index] = &ref_list[ref_idx];
+    }
+
+    if (obj_surface &&
+        obj_surface->bo) {
+        assert(ref_idx >= 0);
+        vme_context->used_reference_objects[list_index] = obj_surface;
+
+        if(is_hevc10){
+            hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
+            assert(hevc_encoder_surface);
+            obj_surface = hevc_encoder_surface->nv12_surface_obj;
+        }
+        vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
+        vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
+                ref_idx << 16 |
+                ref_idx <<  8 |
+                ref_idx);
+    } else {
+        vme_context->used_reference_objects[list_index] = NULL;
+        vme_context->used_references[list_index] = NULL;
+        vme_context->ref_index_in_mb[list_index] = 0;
+    }
+}
+
+void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
+                                     struct encode_state *encode_state,
+                                     struct intel_encoder_context *encoder_context)
+{
+    struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
+    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
+    int qp, m_cost, j, mv_count;
+    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+    float   lambda, m_costf;
+
+    /* here no SI SP slice for HEVC, do not need slice fixup */
+    int slice_type = slice_param->slice_type;
+
+
+    qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+
+    if(encoder_context->rate_control_mode == VA_RC_CBR)
+    {
+        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
+        if(slice_type == HEVC_SLICE_B) {
+            if(pSequenceParameter->ip_period == 1)
+            {
+                slice_type = HEVC_SLICE_P;
+                qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
+
+            }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
+                slice_type = HEVC_SLICE_P;
+                qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
+            }
+        }
+
+    }
+
+    if (vme_state_message == NULL)
+        return;
+
+    assert(qp <= QP_MAX);
+    lambda = intel_lambda_qp(qp);
+    if (slice_type == HEVC_SLICE_I) {
+        vme_state_message[MODE_INTRA_16X16] = 0;
+        m_cost = lambda * 4;
+        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 16;
+        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 3;
+        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+    } else {
+        m_cost = 0;
+        vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
+        for (j = 1; j < 3; j++) {
+            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+            m_cost = (int)m_costf;
+            vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
+        }
+        mv_count = 3;
+        for (j = 4; j <= 64; j *= 2) {
+            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+            m_cost = (int)m_costf;
+            vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
+            mv_count++;
+        }
+
+        if (qp <= 25) {
+            vme_state_message[MODE_INTRA_16X16] = 0x4a;
+            vme_state_message[MODE_INTRA_8X8] = 0x4a;
+            vme_state_message[MODE_INTRA_4X4] = 0x4a;
+            vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
+            vme_state_message[MODE_INTER_16X16] = 0x4a;
+            vme_state_message[MODE_INTER_16X8] = 0x4a;
+            vme_state_message[MODE_INTER_8X8] = 0x4a;
+            vme_state_message[MODE_INTER_8X4] = 0x4a;
+            vme_state_message[MODE_INTER_4X4] = 0x4a;
+            vme_state_message[MODE_INTER_BWD] = 0x2a;
+            return;
+        }
+        m_costf = lambda * 10;
+        vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 14;
+        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 24;
+        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+        m_costf = lambda * 3.5;
+        m_cost = m_costf;
+        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+        if (slice_type == HEVC_SLICE_P) {
+            m_costf = lambda * 2.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+            m_costf = lambda * 4;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+            m_costf = lambda * 1.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 3;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+            /* BWD is not used in P-frame */
+            vme_state_message[MODE_INTER_BWD] = 0;
+        } else {
+            m_costf = lambda * 2.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+            m_costf = lambda * 5.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+            m_costf = lambda * 3.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 5.0;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 6.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 1.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
+        }
     }
 }