OSDN Git Service

build: automake options in configure.ac
[android-x86/hardware-intel-common-vaapi.git] / src / gen7_mfd.c
old mode 100755 (executable)
new mode 100644 (file)
index f4ccb12..6805bd8
@@ -51,7 +51,7 @@ static const uint32_t zigzag_direct[64] = {
 };
 
 static void
-gen7_mfd_init_avc_surface(VADriverContextP ctx, 
+gen7_mfd_init_avc_surface(VADriverContextP ctx,
                           VAPictureParameterBufferH264 *pic_param,
                           struct object_surface *obj_surface)
 {
@@ -65,6 +65,8 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx,
 
     if (!gen7_avc_surface) {
         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+        assert(gen7_avc_surface);
+        gen7_avc_surface->base.frame_store_id = -1;
         assert((obj_surface->size & 0x3f) == 0);
         obj_surface->private_data = gen7_avc_surface;
     }
@@ -120,7 +122,7 @@ gen7_mfd_pipe_mode_select(VADriverContextP ctx,
                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
                   (0 << 1)  |
                   (0 << 0));
-    OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
+    OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
     OUT_BCS_BATCH(batch, 0); /* reserved */
     ADVANCE_BCS_BATCH(batch);
 }
@@ -135,12 +137,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
     struct object_surface *obj_surface = decode_state->render_object;
     unsigned int y_cb_offset;
     unsigned int y_cr_offset;
+    unsigned int surface_format;
 
     assert(obj_surface);
 
     y_cb_offset = obj_surface->y_cb_offset;
     y_cr_offset = obj_surface->y_cr_offset;
 
+    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+                     MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
     OUT_BCS_BATCH(batch, 0);
@@ -148,7 +154,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
                   ((obj_surface->orig_height - 1) << 18) |
                   ((obj_surface->orig_width - 1) << 4));
     OUT_BCS_BATCH(batch,
-                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+                  (surface_format << 28) | /* 420 planar YUV surface */
                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
                   (0 << 22) | /* surface object control state, ignored */
                   ((obj_surface->width - 1) << 3) | /* pitch */
@@ -160,7 +166,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
     OUT_BCS_BATCH(batch,
                   (0 << 16) | /* X offset for V(Cr), must be 0 */
-                  (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
+                  ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
     ADVANCE_BCS_BATCH(batch);
 }
 
@@ -352,12 +358,12 @@ gen7_mfd_avc_img_state(VADriverContextP ctx,
 
     BEGIN_BCS_BATCH(batch, 16);
     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (width_in_mbs * height_in_mbs - 1));
-    OUT_BCS_BATCH(batch, 
-                  ((height_in_mbs - 1) << 16) | 
+    OUT_BCS_BATCH(batch,
+                  ((height_in_mbs - 1) << 16) |
                   ((width_in_mbs - 1) << 0));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
@@ -425,7 +431,7 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx,
     struct object_surface *obj_surface;
     GenAvcSurface *gen7_avc_surface;
     VAPictureH264 *va_pic;
-    int i, j;
+    int i;
 
     BEGIN_BCS_BATCH(batch, 69);
     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
@@ -477,26 +483,14 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx,
 
     /* POC List */
     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
-        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
-            int found = 0;
+        obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
 
-            assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
+        if (obj_surface) {
+            const VAPictureH264 * const va_pic = avc_find_picture(
+                                                     obj_surface->base.id, pic_param->ReferenceFrames,
+                                                     ARRAY_ELEMS(pic_param->ReferenceFrames));
 
-            for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
-                va_pic = &pic_param->ReferenceFrames[j];
-                
-                if (va_pic->flags & VA_PICTURE_H264_INVALID)
-                    continue;
-
-                if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
-                    found = 1;
-                    break;
-                }
-            }
-
-            assert(found == 1);
-            assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
-            
+            assert(va_pic != NULL);
             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
         } else {
@@ -513,6 +507,15 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx,
 }
 
 static void
+gen7_mfd_avc_phantom_slice_first(VADriverContextP ctx,
+                                 VAPictureParameterBufferH264 *pic_param,
+                                 VASliceParameterBufferH264 *next_slice_param,
+                                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
+}
+
+static void
 gen7_mfd_avc_slice_state(VADriverContextP ctx,
                          VAPictureParameterBufferH264 *pic_param,
                          VASliceParameterBufferH264 *slice_param,
@@ -535,7 +538,7 @@ gen7_mfd_avc_slice_state(VADriverContextP ctx,
     } else if (slice_param->slice_type == SLICE_TYPE_P ||
                slice_param->slice_type == SLICE_TYPE_SP) {
         slice_type = SLICE_TYPE_P;
-    } else { 
+    } else {
         assert(slice_param->slice_type == SLICE_TYPE_B);
         slice_type = SLICE_TYPE_B;
     }
@@ -554,14 +557,20 @@ gen7_mfd_avc_slice_state(VADriverContextP ctx,
         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
     }
 
-    first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
-    slice_hor_pos = first_mb_in_slice % width_in_mbs; 
+    first_mb_in_slice = slice_param->first_mb_in_slice;
+    slice_hor_pos = first_mb_in_slice % width_in_mbs;
     slice_ver_pos = first_mb_in_slice / width_in_mbs;
 
+    if (mbaff_picture)
+        slice_ver_pos = slice_ver_pos << 1;
+
     if (next_slice_param) {
-        first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
-        next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
+        first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
+        next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
+
+        if (mbaff_picture)
+            next_slice_ver_pos = next_slice_ver_pos << 1;
     } else {
         next_slice_hor_pos = 0;
         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
@@ -570,26 +579,26 @@ gen7_mfd_avc_slice_state(VADriverContextP ctx,
     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
     OUT_BCS_BATCH(batch, slice_type);
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (num_ref_idx_l1 << 24) |
                   (num_ref_idx_l0 << 16) |
                   (slice_param->chroma_log2_weight_denom << 8) |
                   (slice_param->luma_log2_weight_denom << 0));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (slice_param->direct_spatial_mv_pred_flag << 29) |
                   (slice_param->disable_deblocking_filter_idc << 27) |
                   (slice_param->cabac_init_idc << 24) |
                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (slice_ver_pos << 24) |
-                  (slice_hor_pos << 16) | 
+                  (slice_hor_pos << 16) |
                   (first_mb_in_slice << 0));
     OUT_BCS_BATCH(batch,
                   (next_slice_ver_pos << 16) |
                   (next_slice_hor_pos << 0));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (next_slice_param == NULL) << 19); /* last slice flag */
     OUT_BCS_BATCH(batch, 0);
     OUT_BCS_BATCH(batch, 0);
@@ -626,7 +635,7 @@ gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
         num_weight_offset_table = 1;
     }
-    
+
     if ((slice_param->slice_type == SLICE_TYPE_B) &&
         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
         num_weight_offset_table = 2;
@@ -674,15 +683,15 @@ gen7_mfd_avc_bsd_object(VADriverContextP ctx,
     unsigned int slice_data_bit_offset;
 
     slice_data_bit_offset = avc_get_first_mb_bit_offset(
-        slice_data_bo,
-        slice_param,
-        pic_param->pic_fields.bits.entropy_coding_mode_flag
-    );
+                                slice_data_bo,
+                                slice_param,
+                                pic_param->pic_fields.bits.entropy_coding_mode_flag
+                            );
 
     /* the input bitsteam format on GEN7 differs from GEN6 */
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (slice_param->slice_data_size - slice_param->slice_data_offset));
     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
     OUT_BCS_BATCH(batch,
@@ -748,7 +757,8 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
-    intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
+    intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
+                                       gen7_mfd_context->reference_surface, &gen7_mfd_context->fs_ctx);
     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
@@ -756,20 +766,12 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
 
     /* Current decoded picture */
     obj_surface = decode_state->render_object;
-    obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
-    obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
-
-    /* initial uv component for YUV400 case */
-    if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
-         unsigned int uv_offset = obj_surface->width * obj_surface->height;
-         unsigned int uv_size   = obj_surface->width * obj_surface->height / 2;
-
-         drm_intel_gem_bo_map_gtt(obj_surface->bo);
-         memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
-         drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
-    }
+    if (pic_param->pic_fields.bits.reference_pic_flag)
+        obj_surface->flags |= SURFACE_REFERENCED;
+    else
+        obj_surface->flags &= ~SURFACE_REFERENCED;
 
+    avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
 
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
@@ -856,6 +858,9 @@ gen7_mfd_avc_decode_picture(VADriverContextP ctx,
         else
             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
 
+        if (j == 0 && slice_param->first_mb_in_slice)
+            gen7_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
+
         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
             assert((slice_param->slice_type == SLICE_TYPE_I) ||
@@ -960,7 +965,7 @@ gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
-                  pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
+                  pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
     OUT_BCS_BATCH(batch,
                   pic_param->picture_coding_type << 9);
@@ -1040,10 +1045,43 @@ gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
     }
 }
 
+uint32_t mpeg2_get_slice_data_length(dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *slice_param)
+{
+    uint8_t *buf;
+    uint32_t buf_offset = slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3);
+    uint32_t buf_size = slice_param->slice_data_size - (slice_param->macroblock_offset >> 3);
+    uint32_t i = 0;
+
+    dri_bo_map(slice_data_bo, 0);
+    buf = (uint8_t *)slice_data_bo->virtual + buf_offset;
+
+    if (buf_size < 4)
+        return buf_size;
+
+    while (i <= (buf_size - 4)) {
+        if (buf[i + 2] > 1) {
+            i += 3;
+        } else if (buf[i + 1]) {
+            i += 2;
+        } else if (buf[i] || buf[i + 2] != 1) {
+            i++;
+        } else {
+            break;
+        }
+    }
+
+    if (i <= (buf_size - 4))
+        buf_size = i;
+
+    dri_bo_unmap(slice_data_bo);
+    return buf_size;
+}
+
 static void
 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
                           VAPictureParameterBufferMPEG2 *pic_param,
                           VASliceParameterBufferMPEG2 *slice_param,
+                          dri_bo *slice_data_bo,
                           VASliceParameterBufferMPEG2 *next_slice_param,
                           struct gen7_mfd_context *gen7_mfd_context)
 {
@@ -1056,7 +1094,7 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
         is_field_pic = 1;
     is_field_pic_wa = is_field_pic &&
-        gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
+                      gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
 
     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
     hpos0 = slice_param->slice_horizontal_position;
@@ -1073,9 +1111,9 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
 
     BEGIN_BCS_BATCH(batch, 5);
     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
-    OUT_BCS_BATCH(batch, 
-                  slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
+                  mpeg2_get_slice_data_length(slice_data_bo, slice_param));
+    OUT_BCS_BATCH(batch,
                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
     OUT_BCS_BATCH(batch,
                   hpos0 << 24 |
@@ -1137,7 +1175,7 @@ gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
             else
                 next_slice_param = next_slice_group_param;
 
-            gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
+            gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
             slice_param++;
         }
     }
@@ -1146,14 +1184,6 @@ gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
     intel_batchbuffer_flush(batch);
 }
 
-static const int va_to_gen7_vc1_pic_type[5] = {
-    GEN7_VC1_I_PICTURE,
-    GEN7_VC1_P_PICTURE,
-    GEN7_VC1_B_PICTURE,
-    GEN7_VC1_BI_PICTURE,
-    GEN7_VC1_P_PICTURE,
-};
-
 static const int va_to_gen7_vc1_mv[4] = {
     1, /* 1-MV */
     2, /* 1-MV half-pel */
@@ -1165,7 +1195,7 @@ static const int b_picture_scale_factor[21] = {
     128, 85,  170, 64,  192,
     51,  102, 153, 204, 43,
     215, 37,  74,  111, 148,
-    185, 222, 32,  96,  160, 
+    185, 222, 32,  96,  160,
     224,
 };
 
@@ -1175,14 +1205,18 @@ static const int va_to_gen7_vc1_condover[3] = {
     3
 };
 
-static const int va_to_gen7_vc1_profile[4] = {
-    GEN7_VC1_SIMPLE_PROFILE,
-    GEN7_VC1_MAIN_PROFILE,
-    GEN7_VC1_RESERVED_PROFILE,
-    GEN7_VC1_ADVANCED_PROFILE
+static const int fptype_to_picture_type[8][2] = {
+    {GEN7_VC1_I_PICTURE, GEN7_VC1_I_PICTURE},
+    {GEN7_VC1_I_PICTURE, GEN7_VC1_P_PICTURE},
+    {GEN7_VC1_P_PICTURE, GEN7_VC1_I_PICTURE},
+    {GEN7_VC1_P_PICTURE, GEN7_VC1_P_PICTURE},
+    {GEN7_VC1_B_PICTURE, GEN7_VC1_B_PICTURE},
+    {GEN7_VC1_B_PICTURE, GEN7_VC1_BI_PICTURE},
+    {GEN7_VC1_BI_PICTURE, GEN7_VC1_B_PICTURE},
+    {GEN7_VC1_BI_PICTURE, GEN7_VC1_BI_PICTURE}
 };
 
-static void 
+static void
 gen7_mfd_free_vc1_surface(void **data)
 {
     struct gen7_vc1_surface *gen7_vc1_surface = *data;
@@ -1190,35 +1224,86 @@ gen7_mfd_free_vc1_surface(void **data)
     if (!gen7_vc1_surface)
         return;
 
-    dri_bo_unreference(gen7_vc1_surface->dmv);
+    dri_bo_unreference(gen7_vc1_surface->dmv_top);
+    dri_bo_unreference(gen7_vc1_surface->dmv_bottom);
     free(gen7_vc1_surface);
     *data = NULL;
 }
 
 static void
-gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
+gen7_mfd_init_vc1_surface(VADriverContextP ctx,
                           VAPictureParameterBufferVC1 *pic_param,
                           struct object_surface *obj_surface)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
-    int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
-    int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+    int height_in_mbs;
+    int picture_type;
+    int is_first_field = 1;
+
+    if (!pic_param->sequence_fields.bits.interlace ||
+        (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
+        picture_type = pic_param->picture_fields.bits.picture_type;
+    } else {/* Field-Interlace */
+        is_first_field = pic_param->picture_fields.bits.is_first_field;
+        picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
+    }
 
     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
 
     if (!gen7_vc1_surface) {
         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
+        assert(gen7_vc1_surface);
         assert((obj_surface->size & 0x3f) == 0);
         obj_surface->private_data = gen7_vc1_surface;
     }
 
-    gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
+    if (!pic_param->sequence_fields.bits.interlace ||
+        pic_param->picture_fields.bits.frame_coding_mode < 2 || /* Progressive or Frame-Interlace */
+        is_first_field) {
+        gen7_vc1_surface->picture_type_top = 0;
+        gen7_vc1_surface->picture_type_bottom = 0;
+        gen7_vc1_surface->intensity_compensation_top = 0;
+        gen7_vc1_surface->intensity_compensation_bottom = 0;
+        gen7_vc1_surface->luma_scale_top[0] = 0;
+        gen7_vc1_surface->luma_scale_top[1] = 0;
+        gen7_vc1_surface->luma_scale_bottom[0] = 0;
+        gen7_vc1_surface->luma_scale_bottom[1] = 0;
+        gen7_vc1_surface->luma_shift_top[0] = 0;
+        gen7_vc1_surface->luma_shift_top[1] = 0;
+        gen7_vc1_surface->luma_shift_bottom[0] = 0;
+        gen7_vc1_surface->luma_shift_bottom[1] = 0;
+    }
+
+    if (!pic_param->sequence_fields.bits.interlace ||
+        pic_param->picture_fields.bits.frame_coding_mode < 2) { /* Progressive or Frame-Interlace */
+        gen7_vc1_surface->picture_type_top = picture_type;
+        gen7_vc1_surface->picture_type_bottom = picture_type;
+    } else if (pic_param->picture_fields.bits.top_field_first ^ is_first_field)
+        gen7_vc1_surface->picture_type_bottom = picture_type;
+    else
+        gen7_vc1_surface->picture_type_top = picture_type;
+
+    /*
+     * The Direct MV buffer is scalable with frame height, but
+     * does not scale with frame width as the hardware assumes
+     * that frame width is fixed at 128 MBs.
+     */
 
-    if (gen7_vc1_surface->dmv == NULL) {
-        gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
+    if (gen7_vc1_surface->dmv_top == NULL) {
+        height_in_mbs = ALIGN(obj_surface->orig_height, 16) / 16;
+        gen7_vc1_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
                                              "direct mv w/r buffer",
-                                             width_in_mbs * height_in_mbs * 64,
+                                             128 * height_in_mbs * 64,
+                                             0x1000);
+    }
+
+    if (pic_param->sequence_fields.bits.interlace &&
+        gen7_vc1_surface->dmv_bottom == NULL) {
+        height_in_mbs = ALIGN(obj_surface->orig_height, 32) / 32;
+        gen7_vc1_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
+                                             "direct mv w/r buffer",
+                                             128 * height_in_mbs * 64,
                                              0x1000);
     }
 }
@@ -1231,19 +1316,25 @@ gen7_mfd_vc1_decode_init(VADriverContextP ctx,
     VAPictureParameterBufferVC1 *pic_param;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct object_surface *obj_surface;
+    struct gen7_vc1_surface *gen7_vc1_current_surface;
+    struct gen7_vc1_surface *gen7_vc1_forward_surface;
     dri_bo *bo;
     int width_in_mbs;
     int picture_type;
+    int is_first_field = 1;
+    int i;
+
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
-    picture_type = pic_param->picture_fields.bits.picture_type;
-    intel_update_vc1_frame_store_index(ctx,
-                                       decode_state,
-                                       pic_param,
-                                       gen7_mfd_context->reference_surface);
+
+    if (!pic_param->sequence_fields.bits.interlace ||
+        (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
+        picture_type = pic_param->picture_fields.bits.picture_type;
+    } else {/* Field-Interlace */
+        is_first_field = pic_param->picture_fields.bits.is_first_field;
+        picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
+    }
 
     /* Current decoded picture */
     obj_surface = decode_state->render_object;
@@ -1253,12 +1344,148 @@ gen7_mfd_vc1_decode_init(VADriverContextP ctx,
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
-    gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
 
     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
-    gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
+
+    if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
+        gen7_mfd_context->post_deblocking_output.valid = 0;
+        gen7_mfd_context->pre_deblocking_output.valid = 1;
+    } else {
+        gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
+        gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
+    }
+
+    intel_update_vc1_frame_store_index(ctx,
+                                       decode_state,
+                                       pic_param,
+                                       gen7_mfd_context->reference_surface);
+
+    if (picture_type == GEN7_VC1_P_PICTURE) {
+        obj_surface = decode_state->reference_objects[0];
+        gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
+        if (pic_param->forward_reference_picture != VA_INVALID_ID &&
+            obj_surface)
+            gen7_vc1_forward_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
+        else
+            gen7_vc1_forward_surface = NULL;
+
+        if (!pic_param->sequence_fields.bits.interlace ||
+            pic_param->picture_fields.bits.frame_coding_mode == 0) { /* Progressive */
+            if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
+                if (gen7_vc1_forward_surface) {
+                    gen7_vc1_forward_surface->intensity_compensation_top = 1;
+                    gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
+                    gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
+                    gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
+                    gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
+                    gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
+                }
+            }
+        } else if (pic_param->sequence_fields.bits.interlace &&
+            pic_param->picture_fields.bits.frame_coding_mode == 1) { /* Frame-Interlace */
+            if (pic_param->picture_fields.bits.intensity_compensation) {
+                if (gen7_vc1_forward_surface) {
+                    gen7_vc1_forward_surface->intensity_compensation_top = 1;
+                    gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
+                    gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
+                    gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
+                    gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
+                    gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
+                }
+            }
+        } else if (pic_param->sequence_fields.bits.interlace &&
+                   pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
+            if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
+                if (pic_param->intensity_compensation_field == 1 || /* Top field */
+                    pic_param->intensity_compensation_field == 0) { /* Both fields */
+                    if (is_first_field) {
+                        if ((!pic_param->reference_fields.bits.num_reference_pictures &&
+                             (pic_param->reference_fields.bits.reference_field_pic_indicator ==
+                             pic_param->picture_fields.bits.top_field_first)) ||
+                            pic_param->reference_fields.bits.num_reference_pictures) {
+                            if (gen7_vc1_forward_surface) {
+                                i = gen7_vc1_forward_surface->intensity_compensation_top++;
+                                gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
+                                gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
+                            }
+                        }
+                    } else { /* Second field */
+                        if (pic_param->picture_fields.bits.top_field_first) {
+                            if ((!pic_param->reference_fields.bits.num_reference_pictures &&
+                                 !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
+                                pic_param->reference_fields.bits.num_reference_pictures) {
+                                i = gen7_vc1_current_surface->intensity_compensation_top++;
+                                gen7_vc1_current_surface->luma_scale_top[i] = pic_param->luma_scale;
+                                gen7_vc1_current_surface->luma_shift_top[i] = pic_param->luma_shift;
+                            }
+                        } else {
+                            if ((!pic_param->reference_fields.bits.num_reference_pictures &&
+                                 pic_param->reference_fields.bits.reference_field_pic_indicator) ||
+                                pic_param->reference_fields.bits.num_reference_pictures) {
+                                if (gen7_vc1_forward_surface) {
+                                    i = gen7_vc1_forward_surface->intensity_compensation_top++;
+                                    gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
+                                    gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
+                                }
+                            }
+                        }
+                    }
+                }
+                if (pic_param->intensity_compensation_field == 2 || /* Bottom field */
+                    pic_param->intensity_compensation_field == 0) { /* Both fields */
+                    if (is_first_field) {
+                        if ((!pic_param->reference_fields.bits.num_reference_pictures &&
+                             (pic_param->reference_fields.bits.reference_field_pic_indicator ^
+                              pic_param->picture_fields.bits.top_field_first)) ||
+                            pic_param->reference_fields.bits.num_reference_pictures) {
+                            if (gen7_vc1_forward_surface) {
+                                i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
+                                if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
+                                    gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
+                                    gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
+                                } else { /* Both fields */
+                                    gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
+                                    gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
+                                }
+                            }
+                        }
+                    } else { /* Second field */
+                        if (pic_param->picture_fields.bits.top_field_first) {
+                            if ((!pic_param->reference_fields.bits.num_reference_pictures &&
+                                 pic_param->reference_fields.bits.reference_field_pic_indicator) ||
+                                pic_param->reference_fields.bits.num_reference_pictures) {
+                                if (gen7_vc1_forward_surface) {
+                                    i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
+                                    if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
+                                        gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
+                                        gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
+                                    } else { /* Both fields */
+                                        gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
+                                        gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
+                                    }
+                                }
+                            }
+                        } else {
+                           if ((!pic_param->reference_fields.bits.num_reference_pictures &&
+                                 !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
+                                pic_param->reference_fields.bits.num_reference_pictures) {
+                                i = gen7_vc1_current_surface->intensity_compensation_bottom++;
+                               if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
+                                   gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale;
+                                   gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift;
+                                } else { /* Both fields */
+                                    gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
+                                    gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
 
     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1289,18 +1516,24 @@ gen7_mfd_vc1_decode_init(VADriverContextP ctx,
 
     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
 
-    gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
+    if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
+        gen7_mfd_context->bitplane_read_buffer.valid = 1;
+    else
+        gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
-    
+
     if (gen7_mfd_context->bitplane_read_buffer.valid) {
         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
-        int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+        int height_in_mbs;
         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
         int src_w, src_h;
         uint8_t *src = NULL, *dst = NULL;
 
-        assert(decode_state->bit_plane->buffer);
-        src = decode_state->bit_plane->buffer;
+        if (!pic_param->sequence_fields.bits.interlace ||
+            (pic_param->picture_fields.bits.frame_coding_mode < 2)) /* Progressive or Frame-Interlace */
+            height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+        else /* Field-Interlace */
+            height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
 
         bo = dri_bo_alloc(i965->intel.bufmgr,
                           "VC-1 Bitplane",
@@ -1313,28 +1546,44 @@ gen7_mfd_vc1_decode_init(VADriverContextP ctx,
         assert(bo->virtual);
         dst = bo->virtual;
 
-        for (src_h = 0; src_h < height_in_mbs; src_h++) {
-            for(src_w = 0; src_w < width_in_mbs; src_w++) {
-                int src_index, dst_index;
-                int src_shift;
-                uint8_t src_value;
-
-                src_index = (src_h * width_in_mbs + src_w) / 2;
-                src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
-                src_value = ((src[src_index] >> src_shift) & 0xf);
+        if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
+            for (src_h = 0; src_h < height_in_mbs; src_h++) {
+                for (src_w = 0; src_w < width_in_mbs; src_w++) {
+                    int dst_index;
+                    uint8_t src_value = 0x2;
 
-                if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
-                    src_value |= 0x2;
+                    dst_index = src_w / 2;
+                    dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
                 }
 
-                dst_index = src_w / 2;
-                dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
+                if (src_w & 1)
+                    dst[src_w / 2] >>= 4;
+
+                dst += bitplane_width;
             }
+        } else {
+            assert(decode_state->bit_plane->buffer);
+            src = decode_state->bit_plane->buffer;
 
-            if (src_w & 1)
-                dst[src_w / 2] >>= 4;
+            for (src_h = 0; src_h < height_in_mbs; src_h++) {
+                for (src_w = 0; src_w < width_in_mbs; src_w++) {
+                    int src_index, dst_index;
+                    int src_shift;
+                    uint8_t src_value;
 
-            dst += bitplane_width;
+                    src_index = (src_h * width_in_mbs + src_w) / 2;
+                    src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
+                    src_value = ((src[src_index] >> src_shift) & 0xf);
+
+                    dst_index = src_w / 2;
+                    dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
+                }
+
+                if (src_w & 1)
+                    dst[src_w / 2] >>= 4;
+
+                dst += bitplane_width;
+            }
         }
 
         dri_bo_unmap(bo);
@@ -1350,24 +1599,44 @@ gen7_mfd_vc1_pic_state(VADriverContextP ctx,
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     VAPictureParameterBufferVC1 *pic_param;
     struct object_surface *obj_surface;
+    struct gen7_vc1_surface *gen7_vc1_current_surface;
+    struct gen7_vc1_surface *gen7_vc1_reference_surface;
     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
-    int unified_mv_mode;
+    int unified_mv_mode = 0;
     int ref_field_pic_polarity = 0;
     int scale_factor = 0;
     int trans_ac_y = 0;
     int dmv_surface_valid = 0;
+    int frfd = 0;
     int brfd = 0;
     int fcm = 0;
     int picture_type;
-    int profile;
-    int overlap;
+    int ptype;
+    int overlap = 0;
     int interpolation_mode = 0;
+    int height_in_mbs;
+    int is_first_field = 1;
+    int loopfilter = 0;
+    int bitplane_present;
+    int range_reduction = 0;
+    int range_reduction_scale = 0;
+    int forward_mb = 0, mv_type_mb = 0, skip_mb = 0, direct_mb = 0;
+    int overflags = 0, ac_pred = 0, field_tx = 0;
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
 
-    profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
+    if (!pic_param->sequence_fields.bits.interlace ||
+        (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
+        picture_type = pic_param->picture_fields.bits.picture_type;
+        height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+    } else {/* Field-Interlace */
+        is_first_field = pic_param->picture_fields.bits.is_first_field;
+        picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
+        height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
+    }
+
     dquant = pic_param->pic_quantizer_fields.bits.dquant;
     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
@@ -1403,7 +1672,7 @@ gen7_mfd_vc1_pic_state(VADriverContextP ctx,
                     alt_pquant_edge_mask = 0;
                 }
                 break;
-                
+
             case 0:
                 alt_pquant_edge_mask = 0xf;
                 break;
@@ -1426,29 +1695,85 @@ gen7_mfd_vc1_pic_state(VADriverContextP ctx,
         }
     }
 
-    if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
-        assert(pic_param->mv_fields.bits.mv_mode2 < 4);
-        unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
-    } else {
-        assert(pic_param->mv_fields.bits.mv_mode < 4);
-        unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
+    if (pic_param->sequence_fields.bits.profile == 1 && /* Main Profile */
+        pic_param->sequence_fields.bits.rangered) {
+        obj_surface = decode_state->reference_objects[0];
+
+        gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
+
+        if (pic_param->forward_reference_picture != VA_INVALID_ID &&
+            obj_surface)
+            gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
+        else
+            gen7_vc1_reference_surface = NULL;
+
+        if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
+            if (gen7_vc1_reference_surface)
+                gen7_vc1_current_surface->range_reduction_frame = gen7_vc1_reference_surface->range_reduction_frame;
+            else
+                gen7_vc1_current_surface->range_reduction_frame = 0;
+        else
+            gen7_vc1_current_surface->range_reduction_frame = pic_param->range_reduction_frame;
+
+        if (gen7_vc1_reference_surface) {
+            if (gen7_vc1_current_surface->range_reduction_frame &&
+                !gen7_vc1_reference_surface->range_reduction_frame) {
+                range_reduction = 1;
+                range_reduction_scale = 0;
+            } else if (!gen7_vc1_current_surface->range_reduction_frame &&
+                       gen7_vc1_reference_surface->range_reduction_frame) {
+                range_reduction = 1;
+                range_reduction_scale = 1;
+            }
+        }
     }
 
-    if (pic_param->sequence_fields.bits.interlace == 1 &&
-        pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
-        /* FIXME: calculate reference field picture polarity */
-        assert(0);
-        ref_field_pic_polarity = 0;
+    if ((!pic_param->sequence_fields.bits.interlace ||
+         pic_param->picture_fields.bits.frame_coding_mode != 1) && /* Progressive or Field-Interlace */
+        (picture_type == GEN7_VC1_P_PICTURE ||
+         picture_type == GEN7_VC1_B_PICTURE)) {
+        if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
+            assert(pic_param->mv_fields.bits.mv_mode2 < 4);
+            unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
+        } else {
+            assert(pic_param->mv_fields.bits.mv_mode < 4);
+            unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
+        }
+    }
+
+    if (pic_param->sequence_fields.bits.interlace &&
+        pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
+        picture_type == GEN7_VC1_P_PICTURE &&
+        !pic_param->reference_fields.bits.num_reference_pictures) {
+        if (pic_param->reference_fields.bits.reference_field_pic_indicator == 0) {
+            ref_field_pic_polarity = is_first_field ?
+                                        pic_param->picture_fields.bits.top_field_first :
+                                        !pic_param->picture_fields.bits.top_field_first;
+        } else {
+            ref_field_pic_polarity = is_first_field ?
+                                        !pic_param->picture_fields.bits.top_field_first :
+                                        pic_param->picture_fields.bits.top_field_first;
+        }
     }
 
     if (pic_param->b_picture_fraction < 21)
         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
 
-    picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
-    
-    if (profile == GEN7_VC1_ADVANCED_PROFILE && 
-        picture_type == GEN7_VC1_I_PICTURE)
-        picture_type = GEN7_VC1_BI_PICTURE;
+    if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
+        ptype = GEN7_VC1_P_PICTURE;
+        bitplane_present = 1;
+    } else {
+        ptype = pic_param->picture_fields.bits.picture_type;
+        bitplane_present = !!(pic_param->bitplane_present.value & 0x7f);
+        forward_mb = pic_param->raw_coding.flags.forward_mb;
+        mv_type_mb = pic_param->raw_coding.flags.mv_type_mb;
+        skip_mb = pic_param->raw_coding.flags.skip_mb;
+        direct_mb = pic_param->raw_coding.flags.direct_mb;
+        overflags = pic_param->raw_coding.flags.overflags;
+        ac_pred = pic_param->raw_coding.flags.ac_pred;
+        field_tx = pic_param->raw_coding.flags.field_tx;
+        loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
+    }
 
     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
@@ -1466,83 +1791,100 @@ gen7_mfd_vc1_pic_state(VADriverContextP ctx,
         }
     }
 
-
     if (picture_type == GEN7_VC1_B_PICTURE) {
-        struct gen7_vc1_surface *gen7_vc1_surface = NULL;
-
         obj_surface = decode_state->reference_objects[1];
 
-        if (obj_surface)
-            gen7_vc1_surface = obj_surface->private_data;
-
-        if (!gen7_vc1_surface || 
-            (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
-             va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
-            dmv_surface_valid = 0;
+        if (pic_param->backward_reference_picture != VA_INVALID_ID &&
+            obj_surface)
+            gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
         else
-            dmv_surface_valid = 1;
+            gen7_vc1_reference_surface = NULL;
+
+        if (gen7_vc1_reference_surface) {
+            if (pic_param->sequence_fields.bits.interlace &&
+                pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
+                pic_param->picture_fields.bits.top_field_first ^ is_first_field) {
+                if (gen7_vc1_reference_surface->picture_type_bottom == GEN7_VC1_P_PICTURE)
+                    dmv_surface_valid = 1;
+            } else if (gen7_vc1_reference_surface->picture_type_top == GEN7_VC1_P_PICTURE)
+                dmv_surface_valid = 1;
+        }
     }
 
     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
 
-    if (pic_param->picture_fields.bits.frame_coding_mode < 2)
-        fcm = pic_param->picture_fields.bits.frame_coding_mode;
-    else {
-        if (pic_param->picture_fields.bits.top_field_first)
-            fcm = 2;
+    if (pic_param->sequence_fields.bits.interlace) {
+        if (pic_param->picture_fields.bits.frame_coding_mode < 2)
+            fcm = pic_param->picture_fields.bits.frame_coding_mode;
+        else if (!pic_param->picture_fields.bits.top_field_first)
+            fcm = 3; /* Field with bottom field first */
         else
-            fcm = 3;
+            fcm = 2; /* Field with top field first */
     }
 
-    if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
-        brfd = pic_param->reference_fields.bits.reference_distance;
-        brfd = (scale_factor * brfd) >> 8;
-        brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
+    if (pic_param->sequence_fields.bits.interlace &&
+        pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
+        if (picture_type == GEN7_VC1_I_PICTURE ||
+             picture_type == GEN7_VC1_P_PICTURE) {
+            gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
 
-        if (brfd < 0)
-            brfd = 0;
-    }
+            if (is_first_field)
+                gen7_vc1_current_surface->reference_distance = pic_param->reference_fields.bits.reference_distance;
 
-    overlap = 0;
-    if (profile != GEN7_VC1_ADVANCED_PROFILE){
-        if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
-            pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
-            overlap = 1; 
-        }
-    }else {
-        if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
-             pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
-              overlap = 1; 
-        }
-        if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
-            pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
-             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
-                overlap = 1; 
-             } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
-                        va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
-                 overlap = 1;
-             }
+            frfd = gen7_vc1_current_surface->reference_distance;
+        } else if (picture_type == GEN7_VC1_B_PICTURE) {
+            obj_surface = decode_state->reference_objects[1];
+
+            if (pic_param->backward_reference_picture != VA_INVALID_ID &&
+                obj_surface)
+                gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
+            else
+                gen7_vc1_reference_surface = NULL;
+
+            if (gen7_vc1_reference_surface) {
+                frfd = (scale_factor * gen7_vc1_reference_surface->reference_distance) >> 8;
+
+                brfd = gen7_vc1_reference_surface->reference_distance - frfd - 1;
+                if (brfd < 0)
+                    brfd = 0;
+            }
         }
-    } 
+    }
 
-    assert(pic_param->conditional_overlap_flag < 3);
-    assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
+    if (pic_param->sequence_fields.bits.overlap) {
+        if (pic_param->sequence_fields.bits.profile == 3) { /* Advanced Profile */
+            if (picture_type == GEN7_VC1_P_PICTURE &&
+                pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
+                overlap = 1;
+            }
+            if (picture_type == GEN7_VC1_I_PICTURE ||
+                picture_type == GEN7_VC1_BI_PICTURE) {
+                if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
+                    overlap = 1;
+                } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
+                           pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
+                    overlap = 1;
+                }
+            }
+        } else {
+            if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
+                picture_type != GEN7_VC1_B_PICTURE) {
+                overlap = 1;
+            }
+        }
+    }
 
     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
-        interpolation_mode = 9; /* Half-pel bilinear */
-    else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
-             (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
-              pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
-        interpolation_mode = 1; /* Half-pel bicubic */
+        interpolation_mode = 8 | pic_param->fast_uvmc_flag;
     else
-        interpolation_mode = 0; /* Quarter-pel bicubic */
+        interpolation_mode = 0 | pic_param->fast_uvmc_flag;
 
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
     OUT_BCS_BATCH(batch,
-                  (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
+                  ((height_in_mbs - 1) << 16) |
                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
     OUT_BCS_BATCH(batch,
                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
@@ -1551,15 +1893,15 @@ gen7_mfd_vc1_pic_state(VADriverContextP ctx,
                   pic_param->rounding_control << 13 |
                   pic_param->sequence_fields.bits.syncmarker << 12 |
                   interpolation_mode << 8 |
-                  0 << 7 | /* FIXME: scale up or down ??? */
-                  pic_param->range_reduction_frame << 6 |
-                  pic_param->entrypoint_fields.bits.loopfilter << 5 |
+                  range_reduction_scale << 7 |
+                  range_reduction << 6 |
+                  loopfilter << 5 |
                   overlap << 4 |
-                  !pic_param->picture_fields.bits.is_first_field << 3 |
-                  (pic_param->sequence_fields.bits.profile == 3) << 0);
+                  !is_first_field << 3 |
+                  (pic_param->sequence_fields.bits.profile == 3) << 0); /* Advanced Profile */
     OUT_BCS_BATCH(batch,
                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
-                  picture_type << 26 |
+                  ptype << 26 |
                   fcm << 24 |
                   alt_pq << 16 |
                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
@@ -1570,27 +1912,27 @@ gen7_mfd_vc1_pic_state(VADriverContextP ctx,
                   pic_param->fast_uvmc_flag << 26 |
                   ref_field_pic_polarity << 25 |
                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
-                  pic_param->reference_fields.bits.reference_distance << 20 |
-                  pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
+                  brfd << 20 |
+                  frfd << 16 |
                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
                   pic_param->mv_fields.bits.extended_mv_range << 8 |
                   alt_pquant_edge_mask << 4 |
                   alt_pquant_config << 2 |
-                  pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
+                  pic_param->pic_quantizer_fields.bits.half_qp << 1 |
                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
     OUT_BCS_BATCH(batch,
-                  !!pic_param->bitplane_present.value << 31 |
-                  !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
-                  !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
-                  !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
-                  !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
-                  !pic_param->bitplane_present.flags.bp_overflags << 26 |
-                  !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
-                  !pic_param->bitplane_present.flags.bp_field_tx << 24 |
+                  bitplane_present << 31 |
+                  forward_mb << 30 |
+                  mv_type_mb << 29 |
+                  skip_mb << 28 |
+                  direct_mb << 27 |
+                  overflags << 26 |
+                  ac_pred << 25 |
+                  field_tx << 24 |
                   pic_param->mv_fields.bits.mv_table << 20 |
                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
-                  pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
+                  pic_param->transform_fields.bits.frame_level_transform_type << 12 |
                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
                   pic_param->mb_mode_table << 8 |
                   trans_ac_y << 6 |
@@ -1607,30 +1949,140 @@ gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     VAPictureParameterBufferVC1 *pic_param;
-    int intensitycomp_single;
+    struct gen7_vc1_surface *gen7_vc1_top_surface;
+    struct gen7_vc1_surface *gen7_vc1_bottom_surface;
+    int picture_type;
+    int is_first_field = 1;
+    int intensitycomp_single_fwd = 0;
+    int intensitycomp_single_bwd = 0;
+    int intensitycomp_double_fwd = 0;
+    int lumscale1_single_fwd = 0;
+    int lumscale2_single_fwd = 0;
+    int lumshift1_single_fwd = 0;
+    int lumshift2_single_fwd = 0;
+    int lumscale1_single_bwd = 0;
+    int lumscale2_single_bwd = 0;
+    int lumshift1_single_bwd = 0;
+    int lumshift2_single_bwd = 0;
+    int lumscale1_double_fwd = 0;
+    int lumscale2_double_fwd = 0;
+    int lumshift1_double_fwd = 0;
+    int lumshift2_double_fwd = 0;
+    int replication_mode = 0;
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
 
-    assert(decode_state->pic_param && decode_state->pic_param->buffer);
-    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
-    intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
+    if (!pic_param->sequence_fields.bits.interlace ||
+        (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
+        picture_type = pic_param->picture_fields.bits.picture_type;
+    } else {/* Field-Interlace */
+        is_first_field = pic_param->picture_fields.bits.is_first_field;
+        picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
+    }
+
+    if (picture_type == GEN7_VC1_P_PICTURE ||
+        picture_type == GEN7_VC1_B_PICTURE) {
+        if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID)
+            gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[0].obj_surface->private_data);
+        else
+            gen7_vc1_top_surface = NULL;
+
+        if (gen7_vc1_top_surface) {
+            intensitycomp_single_fwd = !!gen7_vc1_top_surface->intensity_compensation_top;
+            lumscale1_single_fwd = gen7_vc1_top_surface->luma_scale_top[0];
+            lumshift1_single_fwd = gen7_vc1_top_surface->luma_shift_top[0];
+            if (gen7_vc1_top_surface->intensity_compensation_top == 2) {
+                intensitycomp_double_fwd = 1;
+                lumscale1_double_fwd = gen7_vc1_top_surface->luma_scale_top[1];
+                lumshift1_double_fwd = gen7_vc1_top_surface->luma_shift_top[1];
+            }
+        }
+
+        if (pic_param->sequence_fields.bits.interlace &&
+            pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
+            if (gen7_mfd_context->reference_surface[2].surface_id != VA_INVALID_ID)
+                gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[2].obj_surface->private_data);
+            else
+                gen7_vc1_bottom_surface = NULL;
+
+            if (gen7_vc1_bottom_surface) {
+                intensitycomp_single_fwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
+                lumscale2_single_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
+                lumshift2_single_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
+                if (gen7_vc1_bottom_surface->intensity_compensation_bottom == 2) {
+                    intensitycomp_double_fwd |= 2;
+                    lumscale2_double_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[1];
+                    lumshift2_double_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[1];
+                }
+            }
+        }
+    }
+
+    if (picture_type == GEN7_VC1_B_PICTURE) {
+        if (gen7_mfd_context->reference_surface[1].surface_id != VA_INVALID_ID)
+            gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[1].obj_surface->private_data);
+        else
+            gen7_vc1_top_surface = NULL;
+
+        if (gen7_vc1_top_surface) {
+            intensitycomp_single_bwd = !!gen7_vc1_top_surface->intensity_compensation_top;
+            lumscale1_single_bwd = gen7_vc1_top_surface->luma_scale_top[0];
+            lumshift1_single_bwd = gen7_vc1_top_surface->luma_shift_top[0];
+        }
+
+        if (pic_param->sequence_fields.bits.interlace &&
+            pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
+            if (gen7_mfd_context->reference_surface[3].surface_id != VA_INVALID_ID)
+                gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[3].obj_surface->private_data);
+            else
+                gen7_vc1_bottom_surface = NULL;
+
+            if (gen7_vc1_bottom_surface) {
+                intensitycomp_single_bwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
+                lumscale2_single_bwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
+                lumshift2_single_bwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
+            }
+        }
+    }
+
+    if (pic_param->sequence_fields.bits.interlace &&
+        pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
+        if (picture_type == GEN7_VC1_P_PICTURE)
+            replication_mode = 0x5;
+        else if (picture_type == GEN7_VC1_B_PICTURE)
+            replication_mode = 0xf;
+    }
 
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
     OUT_BCS_BATCH(batch,
-                  0 << 14 | /* FIXME: double ??? */
+                  intensitycomp_double_fwd << 14 |
                   0 << 12 |
-                  intensitycomp_single << 10 |
-                  intensitycomp_single << 8 |
-                  0 << 4 | /* FIXME: interlace mode */
+                  intensitycomp_single_fwd << 10 |
+                  intensitycomp_single_bwd << 8 |
+                  replication_mode << 4 |
                   0);
     OUT_BCS_BATCH(batch,
-                  pic_param->luma_shift << 16 |
-                  pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+                  lumshift2_single_fwd << 24 |
+                  lumshift1_single_fwd << 16 |
+                  lumscale2_single_fwd << 8 |
+                  lumscale1_single_fwd << 0);
+    OUT_BCS_BATCH(batch,
+                  lumshift2_double_fwd << 24 |
+                  lumshift1_double_fwd << 16 |
+                  lumscale2_double_fwd << 8 |
+                  lumscale1_double_fwd << 0);
+    OUT_BCS_BATCH(batch,
+                  lumshift2_single_bwd << 24 |
+                  lumshift1_single_bwd << 16 |
+                  lumscale2_single_bwd << 8 |
+                  lumscale1_single_bwd << 0);
+    OUT_BCS_BATCH(batch,
+                  0 << 24 |
+                  0 << 16 |
+                  0 << 8 |
+                  0 << 0);
     ADVANCE_BCS_BATCH(batch);
 }
 
@@ -1641,19 +2093,47 @@ gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
                               struct gen7_mfd_context *gen7_mfd_context)
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferVC1 *pic_param;
     struct object_surface *obj_surface;
     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
+    int picture_type;
+    int is_first_field = 1;
 
-    obj_surface = decode_state->render_object;
+    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
 
-    if (obj_surface && obj_surface->private_data) {
-        dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+    if (!pic_param->sequence_fields.bits.interlace ||
+        (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
+        picture_type = pic_param->picture_fields.bits.picture_type;
+    } else {/* Field-Interlace */
+        is_first_field = pic_param->picture_fields.bits.is_first_field;
+        picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
     }
 
-    obj_surface = decode_state->reference_objects[1];
+    if (picture_type == GEN7_VC1_P_PICTURE ||
+        picture_type == GEN7_VC1_SKIPPED_PICTURE) {
+        obj_surface = decode_state->render_object;
+
+        if (pic_param->sequence_fields.bits.interlace &&
+            (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
+            (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
+            dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
+        else
+            dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
+    }
 
-    if (obj_surface && obj_surface->private_data) {
-        dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+    if (picture_type == GEN7_VC1_B_PICTURE) {
+        obj_surface = decode_state->reference_objects[1];
+        if (pic_param->backward_reference_picture != VA_INVALID_ID &&
+            obj_surface &&
+            obj_surface->private_data) {
+
+            if (pic_param->sequence_fields.bits.interlace &&
+                (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
+                (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
+                dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
+            else
+                dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
+        }
     }
 
     BEGIN_BCS_BATCH(batch, 3);
@@ -1672,7 +2152,7 @@ gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
                       0);
     else
         OUT_BCS_BATCH(batch, 0);
-                  
+
     ADVANCE_BCS_BATCH(batch);
 }
 
@@ -1683,17 +2163,23 @@ gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offse
     int slice_header_size = in_slice_data_bit_offset / 8;
     int i, j;
 
-    if (profile != 3)
-        out_slice_data_bit_offset = in_slice_data_bit_offset;
-    else {
-        for (i = 0, j = 0; i < slice_header_size; i++, j++) {
+    if (profile == 3 && slice_header_size) { /* Advanced Profile */
+        for (i = 0, j = 0; i < slice_header_size - 1; i++, j++)
+            if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4)
+                    i++, j += 2;
+
+        if (i == slice_header_size - 1) {
             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
-                i++, j += 2;
+                buf[j + 2] = 0;
+                j++;
             }
+
+            j++;
         }
 
         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-    }
+    } else /* Simple or Main Profile */
+        out_slice_data_bit_offset = in_slice_data_bit_offset;
 
     return out_slice_data_bit_offset;
 }
@@ -1711,23 +2197,26 @@ gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
     int macroblock_offset;
     uint8_t *slice_data = NULL;
 
-    dri_bo_map(slice_data_bo, 0);
+    dri_bo_map(slice_data_bo, True);
     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
-    macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
+    macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data,
                                                                slice_param->macroblock_offset,
                                                                pic_param->sequence_fields.bits.profile);
     dri_bo_unmap(slice_data_bo);
 
     if (next_slice_param)
         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
-    else
+    else if (!pic_param->sequence_fields.bits.interlace ||
+             pic_param->picture_fields.bits.frame_coding_mode < 2) /* Progressive or Frame-Interlace */
         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
+    else /* Field-Interlace */
+        next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 32) / 32;
 
     BEGIN_BCS_BATCH(batch, 5);
     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   slice_param->slice_data_size - (macroblock_offset >> 3));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   slice_param->slice_data_offset + (macroblock_offset >> 3));
     OUT_BCS_BATCH(batch,
                   slice_param->slice_vertical_position << 16 |
@@ -1813,32 +2302,24 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
         int v2 = pic_param->components[1].v_sampling_factor;
         int v3 = pic_param->components[2].v_sampling_factor;
 
-        if (h1 == 2 && h2 == 1 && h3 == 1 &&
-            v1 == 2 && v2 == 1 && v3 == 1) {
+        if (h1 == 2 * h2 && h2 == h3 &&
+            v1 == 2 * v2 && v2 == v3) {
             subsampling = SUBSAMPLE_YUV420;
             fourcc = VA_FOURCC_IMC3;
-        } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
-                   v1 == 1 && v2 == 1 && v3 == 1) {
+        } else if (h1 == 2 * h2  && h2 == h3 &&
+                   v1 == v2 && v2 == v3) {
             subsampling = SUBSAMPLE_YUV422H;
             fourcc = VA_FOURCC_422H;
-        } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
-                   v1 == 1 && v2 == 1 && v3 == 1) {
+        } else if (h1 == h2 && h2 == h3 &&
+                   v1 == v2  && v2 == v3) {
             subsampling = SUBSAMPLE_YUV444;
             fourcc = VA_FOURCC_444P;
-        } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
-                   v1 == 1 && v2 == 1 && v3 == 1) {
+        } else if (h1 == 4 * h2 && h2 ==  h3 &&
+                   v1 == v2 && v2 == v3) {
             subsampling = SUBSAMPLE_YUV411;
             fourcc = VA_FOURCC_411P;
-        } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
-                   v1 == 2 && v2 == 1 && v3 == 1) {
-            subsampling = SUBSAMPLE_YUV422V;
-            fourcc = VA_FOURCC_422V;
-        } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
-                   v1 == 2 && v2 == 2 && v3 == 2) {
-            subsampling = SUBSAMPLE_YUV422H;
-            fourcc = VA_FOURCC_422H;
-        } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
-                   v1 == 2 && v2 == 1 && v3 == 1) {
+        } else if (h1 == h2 && h2 == h3 &&
+                   v1 == 2 * v2 && v2 == v3) {
             subsampling = SUBSAMPLE_YUV422V;
             fourcc = VA_FOURCC_422V;
         } else
@@ -1906,17 +2387,17 @@ gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
         int v2 = pic_param->components[1].v_sampling_factor;
         int v3 = pic_param->components[2].v_sampling_factor;
 
-        if (h1 == 2 && h2 == 1 && h3 == 1 &&
-            v1 == 2 && v2 == 1 && v3 == 1)
+        if (h1 == 2 * h2 && h2 == h3 &&
+            v1 == 2 * v2 && v2 == v3)
             chroma_type = GEN7_YUV420;
         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
                  v1 == 1 && v2 == 1 && v3 == 1)
             chroma_type = GEN7_YUV422H_2Y;
-        else if (h1 == 1 && h2 == 1 && h3 == 1 &&
-                 v1 == 1 && v2 == 1 && v3 == 1)
+        else if (h1 == h2 && h2 == h3 &&
+                 v1 == v2 && v2 == v3)
             chroma_type = GEN7_YUV444;
-        else if (h1 == 4 && h2 == 1 && h3 == 1 &&
-                 v1 == 1 && v2 == 1 && v3 == 1)
+        else if (h1 == 4 * h2 && h2 == h3 &&
+                 v1 == v2 && v2 == v3)
             chroma_type = GEN7_YUV411;
         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
                  v1 == 2 && v2 == 1 && v3 == 1)
@@ -2072,9 +2553,9 @@ gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
 
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   slice_param->slice_data_size);
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   slice_param->slice_data_offset);
     OUT_BCS_BATCH(batch,
                   slice_param->slice_horizontal_position << 16 |
@@ -2091,14 +2572,6 @@ gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
 
 /* Workaround for JPEG decoding on Ivybridge */
 
-VAStatus 
-i965_CreateSurfaces(VADriverContextP ctx,
-                    int width,
-                    int height,
-                    int format,
-                    int num_surfaces,
-                    VASurfaceID *surfaces);
-
 static struct {
     int width;
     int height;
@@ -2179,7 +2652,7 @@ gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
                   (0 << 1)  |
                   (0 << 0));
-    OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
+    OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
     OUT_BCS_BATCH(batch, 0); /* reserved */
     ADVANCE_BCS_BATCH(batch);
 }
@@ -2235,7 +2708,7 @@ gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
                   obj_surface->bo,
                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                   0);
-    
+
     OUT_BCS_BATCH(batch, 0); /* post deblocking */
 
     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
@@ -2315,12 +2788,12 @@ gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
 
     BEGIN_BCS_BATCH(batch, 16);
     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (width_in_mbs * height_in_mbs - 1));
-    OUT_BCS_BATCH(batch, 
-                  ((height_in_mbs - 1) << 16) | 
+    OUT_BCS_BATCH(batch,
+                  ((height_in_mbs - 1) << 16) |
                   ((width_in_mbs - 1) << 0));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (0 << 24) |
                   (0 << 16) |
                   (0 << 14) |
@@ -2448,21 +2921,21 @@ gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
     BEGIN_BCS_BATCH(batch, 11);
     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
     OUT_BCS_BATCH(batch, slice_type);
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (num_ref_idx_l1 << 24) |
                   (num_ref_idx_l0 << 16) |
                   (0 << 8) |
                   (0 << 0));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (0 << 29) |
                   (1 << 27) |   /* disable Deblocking */
                   (0 << 24) |
                   (gen7_jpeg_wa_clip.qp << 16) |
                   (0 << 8) |
                   (0 << 0));
-    OUT_BCS_BATCH(batch, 
+    OUT_BCS_BATCH(batch,
                   (slice_ver_pos << 24) |
-                  (slice_hor_pos << 16) | 
+                  (slice_hor_pos << 16) |
                   (first_mb_in_slice << 0));
     OUT_BCS_BATCH(batch,
                   (next_slice_ver_pos << 16) |
@@ -2585,8 +3058,8 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
 }
 
 static VAStatus
-gen7_mfd_decode_picture(VADriverContextP ctx, 
-                        VAProfile profile, 
+gen7_mfd_decode_picture(VADriverContextP ctx,
+                        VAProfile profile,
                         union codec_state *codec_state,
                         struct hw_context *hw_context)
 
@@ -2609,10 +3082,11 @@ gen7_mfd_decode_picture(VADriverContextP ctx,
     case VAProfileMPEG2Main:
         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
         break;
-        
+
     case VAProfileH264ConstrainedBaseline:
     case VAProfileH264Main:
     case VAProfileH264High:
+    case VAProfileH264StereoHigh:
         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
         break;
 
@@ -2640,8 +3114,11 @@ out:
 static void
 gen7_mfd_context_destroy(void *hw_context)
 {
+    VADriverContextP ctx;
     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
 
+    ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
+
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
     gen7_mfd_context->post_deblocking_output.bo = NULL;
 
@@ -2665,12 +3142,19 @@ gen7_mfd_context_destroy(void *hw_context)
 
     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
 
+    if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
+        i965_DestroySurfaces(ctx,
+                             &gen7_mfd_context->jpeg_wa_surface_id,
+                             1);
+        gen7_mfd_context->jpeg_wa_surface_object = NULL;
+    }
+
     intel_batchbuffer_free(gen7_mfd_context->base.batch);
     free(gen7_mfd_context);
 }
 
 static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
-                                    struct gen7_mfd_context *gen7_mfd_context)
+                                        struct gen7_mfd_context *gen7_mfd_context)
 {
     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
@@ -2685,6 +3169,7 @@ gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
     int i;
 
+    assert(gen7_mfd_context);
     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
@@ -2707,10 +3192,13 @@ gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
     case VAProfileH264ConstrainedBaseline:
     case VAProfileH264Main:
     case VAProfileH264High:
+    case VAProfileH264StereoHigh:
         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
         break;
     default:
         break;
     }
+
+    gen7_mfd_context->driver_context = ctx;
     return (struct hw_context *)gen7_mfd_context;
 }