OSDN Git Service

Set the pipeline to use the new VP8 encoding shaders on BSW
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
index 4997c20..07d0968 100644 (file)
@@ -74,13 +74,17 @@ gen8_mfd_init_avc_surface(VADriverContextP ctx,
 
     if (!gen7_avc_surface) {
         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+
+        if (!gen7_avc_surface)
+            return;
+
+        gen7_avc_surface->base.frame_store_id = -1;
         assert((obj_surface->size & 0x3f) == 0);
         obj_surface->private_data = gen7_avc_surface;
     }
 
-    gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
-                                         !pic_param->seq_fields.bits.direct_8x8_inference_flag);
-
+    /* DMV buffers now relate to the whole frame, irrespective of
+       field coding modes */
     if (gen7_avc_surface->dmv_top == NULL) {
         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
                                                  "direct mv w/r buffer",
@@ -88,15 +92,6 @@ gen8_mfd_init_avc_surface(VADriverContextP ctx,
                                                  0x1000);
         assert(gen7_avc_surface->dmv_top);
     }
-
-    if (gen7_avc_surface->dmv_bottom_flag &&
-        gen7_avc_surface->dmv_bottom == NULL) {
-        gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
-                                                    "direct mv w/r buffer",
-                                                    width_in_mbs * height_in_mbs * 128,                                                    
-                                                    0x1000);
-        assert(gen7_avc_surface->dmv_bottom);
-    }
 }
 
 static void
@@ -145,12 +140,16 @@ gen8_mfd_surface_state(VADriverContextP ctx,
     struct object_surface *obj_surface = decode_state->render_object;
     unsigned int y_cb_offset;
     unsigned int y_cr_offset;
+    unsigned int surface_format;
 
     assert(obj_surface);
 
     y_cb_offset = obj_surface->y_cb_offset;
     y_cr_offset = obj_surface->y_cr_offset;
 
+    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
     OUT_BCS_BATCH(batch, 0);
@@ -158,7 +157,7 @@ gen8_mfd_surface_state(VADriverContextP ctx,
                   ((obj_surface->orig_height - 1) << 18) |
                   ((obj_surface->orig_width - 1) << 4));
     OUT_BCS_BATCH(batch,
-                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+                  (surface_format << 28) | /* 420 planar YUV surface */
                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
                   (0 << 22) | /* surface object control state, ignored */
                   ((obj_surface->width - 1) << 3) | /* pitch */
@@ -180,6 +179,7 @@ gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
                              int standard_select,
                              struct gen7_mfd_context *gen7_mfd_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     int i;
 
@@ -187,24 +187,27 @@ gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
        /* Pre-deblock 1-3 */
     if (gen7_mfd_context->pre_deblocking_output.valid)
-        OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
+        OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                       0);
-    else
+    else {
         OUT_BCS_BATCH(batch, 0);
 
        OUT_BCS_BATCH(batch, 0);
-       OUT_BCS_BATCH(batch, 0);
+    }
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
+
        /* Post-debloing 4-6 */
     if (gen7_mfd_context->post_deblocking_output.valid)
-        OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
+        OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                       0);
-    else
+    else {
         OUT_BCS_BATCH(batch, 0);
 
        OUT_BCS_BATCH(batch, 0);
-       OUT_BCS_BATCH(batch, 0);
+    }
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
        /* uncompressed-video & stream out 7-12 */
     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
@@ -216,23 +219,27 @@ gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
 
        /* intra row-store scratch 13-15 */
     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
-        OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
+        OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                       0);
-    else
+    else {
         OUT_BCS_BATCH(batch, 0);
 
        OUT_BCS_BATCH(batch, 0);
-       OUT_BCS_BATCH(batch, 0);
+    }
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
+
        /* deblocking-filter-row-store 16-18 */
     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
-        OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
+        OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                       0);
-    else
+    else {
         OUT_BCS_BATCH(batch, 0);
        OUT_BCS_BATCH(batch, 0);
-       OUT_BCS_BATCH(batch, 0);
+    }
+
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
     /* DW 19..50 */
     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
@@ -243,18 +250,18 @@ gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
 
-            OUT_BCS_RELOC(batch, obj_surface->bo,
+            OUT_BCS_RELOC64(batch, obj_surface->bo,
                           I915_GEM_DOMAIN_INSTRUCTION, 0,
                           0);
         } else {
             OUT_BCS_BATCH(batch, 0);
+            OUT_BCS_BATCH(batch, 0);
         }
         
-        OUT_BCS_BATCH(batch, 0);
     }
     
     /* reference property 51 */
-    OUT_BCS_BATCH(batch, 0);  
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
        
     /* Macroblock status & ILDB 52-57 */
     OUT_BCS_BATCH(batch, 0);
@@ -279,15 +286,15 @@ gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
                                  struct gen7_mfd_context *gen7_mfd_context)
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
 
     BEGIN_BCS_BATCH(batch, 26);
     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
        /* MFX In BS 1-5 */
-    OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
        /* Upper bound 4-5 */   
-    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+    OUT_BCS_BATCH(batch, 0);
     OUT_BCS_BATCH(batch, 0);
 
        /* MFX indirect MV 6-10 */
@@ -328,39 +335,43 @@ gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
                                  struct gen7_mfd_context *gen7_mfd_context)
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
 
     BEGIN_BCS_BATCH(batch, 10);
     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
 
     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
-        OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
+        OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                       0);
-       else
+       else {
                OUT_BCS_BATCH(batch, 0);
+               OUT_BCS_BATCH(batch, 0);
+    }
                
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
        /* MPR Row Store Scratch buffer 4-6 */
     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
-        OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
+        OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                       0);
-    else
+    else {
+        OUT_BCS_BATCH(batch, 0);
         OUT_BCS_BATCH(batch, 0);
+    }
 
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
        /* Bitplane 7-9 */ 
     if (gen7_mfd_context->bitplane_read_buffer.valid)
-        OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
+        OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
                       I915_GEM_DOMAIN_INSTRUCTION, 0,
                       0);
-    else
+    else {
        OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+       OUT_BCS_BATCH(batch, 0);
+    }
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
     ADVANCE_BCS_BATCH(batch);
 }
 
@@ -433,7 +444,7 @@ gen8_mfd_avc_img_state(VADriverContextP ctx,
     BEGIN_BCS_BATCH(batch, 17);
     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
     OUT_BCS_BATCH(batch, 
-                  width_in_mbs * height_in_mbs);
+                  (width_in_mbs * height_in_mbs - 1));
     OUT_BCS_BATCH(batch, 
                   ((height_in_mbs - 1) << 16) | 
                   ((width_in_mbs - 1) << 0));
@@ -495,25 +506,13 @@ gen8_mfd_avc_qm_state(VADriverContextP ctx,
     }
 }
 
-static void
+static inline void
 gen8_mfd_avc_picid_state(VADriverContextP ctx,
-                      struct decode_state *decode_state,
-                      struct gen7_mfd_context *gen7_mfd_context)
+    struct decode_state *decode_state,
+    struct gen7_mfd_context *gen7_mfd_context)
 {
-    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-
-    BEGIN_BCS_BATCH(batch, 10);
-    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
-    OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    ADVANCE_BCS_BATCH(batch);
+    gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
+        gen7_mfd_context->reference_surface);
 }
 
 static void
@@ -523,11 +522,12 @@ gen8_mfd_avc_directmode_state(VADriverContextP ctx,
                               VASliceParameterBufferH264 *slice_param,
                               struct gen7_mfd_context *gen7_mfd_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     struct object_surface *obj_surface;
     GenAvcSurface *gen7_avc_surface;
     VAPictureH264 *va_pic;
-    int i, j;
+    int i;
 
     BEGIN_BCS_BATCH(batch, 71);
     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
@@ -541,17 +541,16 @@ gen8_mfd_avc_directmode_state(VADriverContextP ctx,
             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
             gen7_avc_surface = obj_surface->private_data;
 
-            OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+            OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
                           I915_GEM_DOMAIN_INSTRUCTION, 0,
                           0);
-            OUT_BCS_BATCH(batch, 0);
         } else {
             OUT_BCS_BATCH(batch, 0);
             OUT_BCS_BATCH(batch, 0);
         }
     }
     
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
     /* the current decoding frame/field */
     va_pic = &pic_param->CurrPic;
@@ -559,35 +558,22 @@ gen8_mfd_avc_directmode_state(VADriverContextP ctx,
     assert(obj_surface->bo && obj_surface->private_data);
     gen7_avc_surface = obj_surface->private_data;
 
-    OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+    OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                   0);
 
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
     /* POC List */
     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
-        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
-            int found = 0;
-
-            assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
+        obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
 
-            for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
-                va_pic = &pic_param->ReferenceFrames[j];
-                
-                if (va_pic->flags & VA_PICTURE_H264_INVALID)
-                    continue;
+        if (obj_surface) {
+            const VAPictureH264 * const va_pic = avc_find_picture(
+                obj_surface->base.id, pic_param->ReferenceFrames,
+                ARRAY_ELEMS(pic_param->ReferenceFrames));
 
-                if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
-                    found = 1;
-                    break;
-                }
-            }
-
-            assert(found == 1);
-            assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
-            
+            assert(va_pic != NULL);
             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
         } else {
@@ -604,6 +590,15 @@ gen8_mfd_avc_directmode_state(VADriverContextP ctx,
 }
 
 static void
+gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
+                                 VAPictureParameterBufferH264 *pic_param,
+                                 VASliceParameterBufferH264 *next_slice_param,
+                                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
+}
+
+static void
 gen8_mfd_avc_slice_state(VADriverContextP ctx,
                          VAPictureParameterBufferH264 *pic_param,
                          VASliceParameterBufferH264 *slice_param,
@@ -645,14 +640,19 @@ gen8_mfd_avc_slice_state(VADriverContextP ctx,
         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
     }
 
-    first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
+    first_mb_in_slice = slice_param->first_mb_in_slice;
     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
     slice_ver_pos = first_mb_in_slice / width_in_mbs;
 
+    if (mbaff_picture)
+        slice_ver_pos = slice_ver_pos << 1;
     if (next_slice_param) {
-        first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
+        first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
+
+        if (mbaff_picture)
+            next_slice_ver_pos = next_slice_ver_pos << 1;
     } else {
         next_slice_hor_pos = 0;
         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
@@ -835,7 +835,8 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx,
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
-    intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
+    gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
+        gen7_mfd_context->reference_surface);
     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
@@ -843,20 +844,12 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx,
 
     /* Current decoded picture */
     obj_surface = decode_state->render_object;
-    obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
-    obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
-
-    /* initial uv component for YUV400 case */
-    if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
-         unsigned int uv_offset = obj_surface->width * obj_surface->height; 
-         unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
-
-         drm_intel_gem_bo_map_gtt(obj_surface->bo);
-         memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
-         drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
-    }
+    if (pic_param->pic_fields.bits.reference_pic_flag)
+        obj_surface->flags |= SURFACE_REFERENCED;
+    else
+        obj_surface->flags &= ~SURFACE_REFERENCED;
 
+    avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
 
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
@@ -930,8 +923,8 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx,
     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
-    gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
+    gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
 
     for (j = 0; j < decode_state->num_slice_params; j++) {
         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
@@ -944,6 +937,9 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx,
         else
             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
 
+        if (j == 0 && slice_param->first_mb_in_slice)
+            gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
+
         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
             assert((slice_param->slice_type == SLICE_TYPE_I) ||
@@ -994,7 +990,7 @@ gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
 
     /* Current decoded picture */
     obj_surface = decode_state->render_object;
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
 
     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
@@ -1292,6 +1288,10 @@ gen8_mfd_init_vc1_surface(VADriverContextP ctx,
 
     if (!gen7_vc1_surface) {
         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
+
+        if (!gen7_vc1_surface)
+            return;
+
         assert((obj_surface->size & 0x3f) == 0);
         obj_surface->private_data = gen7_vc1_surface;
     }
@@ -1330,7 +1330,7 @@ gen8_mfd_vc1_decode_init(VADriverContextP ctx,
 
     /* Current decoded picture */
     obj_surface = decode_state->render_object;
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
 
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
@@ -1586,25 +1586,29 @@ gen8_mfd_vc1_pic_state(VADriverContextP ctx,
             brfd = 0;
     }
 
-    overlap = 0;
-    if (profile != GEN7_VC1_ADVANCED_PROFILE){
-        if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
-            pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
-            overlap = 1; 
-        }
-    }else {
-        if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
-             pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
-              overlap = 1; 
-        }
-        if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
-            pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
-             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
-                overlap = 1; 
-             } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
-                        va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
-                 overlap = 1;
-             }
+    overlap = pic_param->sequence_fields.bits.overlap;
+
+    if (overlap) {
+        overlap = 0;
+        if (profile != GEN7_VC1_ADVANCED_PROFILE){
+            if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
+                pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
+                overlap = 1;
+            }
+        }else {
+            if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
+                pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
+                overlap = 1;
+            }
+            if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
+                pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
+                if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
+                    overlap = 1;
+                } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
+                           va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
+                    overlap = 1;
+                }
+            }
         }
     } 
 
@@ -1694,9 +1698,6 @@ gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
-
-    assert(decode_state->pic_param && decode_state->pic_param->buffer);
-    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
 
     BEGIN_BCS_BATCH(batch, 6);
@@ -1722,6 +1723,7 @@ gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
                               struct decode_state *decode_state,
                               struct gen7_mfd_context *gen7_mfd_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     struct object_surface *obj_surface;
     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
@@ -1742,24 +1744,26 @@ gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
 
     if (dmv_write_buffer)
-        OUT_BCS_RELOC(batch, dmv_write_buffer,
+        OUT_BCS_RELOC64(batch, dmv_write_buffer,
                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                       0);
-    else
+    else {
         OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+    }
 
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
     if (dmv_read_buffer)
-        OUT_BCS_RELOC(batch, dmv_read_buffer,
+        OUT_BCS_RELOC64(batch, dmv_read_buffer,
                       I915_GEM_DOMAIN_INSTRUCTION, 0,
                       0);
-    else
+    else {
         OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+    }
     
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
                   
     ADVANCE_BCS_BATCH(batch);
 }
@@ -1886,12 +1890,14 @@ gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
     struct object_surface *obj_surface;
     VAPictureParameterBufferJPEGBaseline *pic_param;
     int subsampling = SUBSAMPLE_YUV420;
+    int fourcc = VA_FOURCC_IMC3;
 
     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
 
-    if (pic_param->num_components == 1)
+    if (pic_param->num_components == 1) {
         subsampling = SUBSAMPLE_YUV400;
-    else if (pic_param->num_components == 3) {
+        fourcc = VA_FOURCC_Y800;
+    } else if (pic_param->num_components == 3) {
         int h1 = pic_param->components[0].h_sampling_factor;
         int h2 = pic_param->components[1].h_sampling_factor;
         int h3 = pic_param->components[2].h_sampling_factor;
@@ -1900,35 +1906,43 @@ gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
         int v3 = pic_param->components[2].v_sampling_factor;
 
         if (h1 == 2 && h2 == 1 && h3 == 1 &&
-            v1 == 2 && v2 == 1 && v3 == 1)
+            v1 == 2 && v2 == 1 && v3 == 1) {
             subsampling = SUBSAMPLE_YUV420;
-        else if (h1 == 2 && h2 == 1 && h3 == 1 &&
-                 v1 == 1 && v2 == 1 && v3 == 1)
+            fourcc = VA_FOURCC_IMC3;
+        } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+                   v1 == 1 && v2 == 1 && v3 == 1) {
             subsampling = SUBSAMPLE_YUV422H;
-        else if (h1 == 1 && h2 == 1 && h3 == 1 &&
-                 v1 == 1 && v2 == 1 && v3 == 1)
+            fourcc = VA_FOURCC_422H;
+        } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+                   v1 == 1 && v2 == 1 && v3 == 1) {
             subsampling = SUBSAMPLE_YUV444;
-        else if (h1 == 4 && h2 == 1 && h3 == 1 &&
-                 v1 == 1 && v2 == 1 && v3 == 1)
+            fourcc = VA_FOURCC_444P;
+        } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
+                   v1 == 1 && v2 == 1 && v3 == 1) {
             subsampling = SUBSAMPLE_YUV411;
-        else if (h1 == 1 && h2 == 1 && h3 == 1 &&
-                 v1 == 2 && v2 == 1 && v3 == 1)
+            fourcc = VA_FOURCC_411P;
+        } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+                   v1 == 2 && v2 == 1 && v3 == 1) {
             subsampling = SUBSAMPLE_YUV422V;
-        else if (h1 == 2 && h2 == 1 && h3 == 1 &&
-                 v1 == 2 && v2 == 2 && v3 == 2)
+            fourcc = VA_FOURCC_422V;
+        } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+                   v1 == 2 && v2 == 2 && v3 == 2) {
             subsampling = SUBSAMPLE_YUV422H;
-        else if (h2 == 2 && h2 == 2 && h3 == 2 &&
-                 v1 == 2 && v2 == 1 && v3 == 1)
+            fourcc = VA_FOURCC_422H;
+        } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
+                   v1 == 2 && v2 == 1 && v3 == 1) {
             subsampling = SUBSAMPLE_YUV422V;
-        else
+            fourcc = VA_FOURCC_422V;
+        } else
             assert(0);
-    } else {
+    }
+    else {
         assert(0);
     }
 
     /* Current decoded picture */
     obj_surface = decode_state->render_object;
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
 
     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
@@ -2171,18 +2185,6 @@ gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
 /* Workaround for JPEG decoding on Ivybridge */
 #ifdef JPEG_WA
 
-VAStatus 
-i965_DestroySurfaces(VADriverContextP ctx,
-                     VASurfaceID *surface_list,
-                     int num_surfaces);
-VAStatus 
-i965_CreateSurfaces(VADriverContextP ctx,
-                    int width,
-                    int height,
-                    int format,
-                    int num_surfaces,
-                    VASurfaceID *surfaces);
-
 static struct {
     int width;
     int height;
@@ -2225,7 +2227,7 @@ gen8_jpeg_wa_init(VADriverContextP ctx,
 
     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
     assert(obj_surface);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
 
     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
@@ -2315,12 +2317,11 @@ gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
 
     BEGIN_BCS_BATCH(batch, 61);
     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
-    OUT_BCS_RELOC(batch,
+    OUT_BCS_RELOC64(batch,
                   obj_surface->bo,
                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                   0);
-       OUT_BCS_BATCH(batch, 0);
-       OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
     
 
     OUT_BCS_BATCH(batch, 0); /* post deblocking */
@@ -2336,12 +2337,12 @@ gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
        OUT_BCS_BATCH(batch, 0);
 
        /* the DW 13-15 is for intra row store scratch */
-    OUT_BCS_RELOC(batch,
+    OUT_BCS_RELOC64(batch,
                   intra_bo,
                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                   0);
-       OUT_BCS_BATCH(batch, 0);
-       OUT_BCS_BATCH(batch, 0);
+
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
        /* the DW 16-18 is for deblocking filter */ 
     OUT_BCS_BATCH(batch, 0);
@@ -2393,20 +2394,18 @@ gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
     BEGIN_BCS_BATCH(batch, 10);
     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
 
-    OUT_BCS_RELOC(batch,
+    OUT_BCS_RELOC64(batch,
                   bsd_mpc_bo,
                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                   0);
 
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
-    OUT_BCS_RELOC(batch,
+    OUT_BCS_RELOC64(batch,
                   mpr_bo,
                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                   0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
 
     OUT_BCS_BATCH(batch, 0);
     OUT_BCS_BATCH(batch, 0);
@@ -2516,12 +2515,11 @@ gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
 
     BEGIN_BCS_BATCH(batch, 11);
     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
-    OUT_BCS_RELOC(batch,
+    OUT_BCS_RELOC64(batch,
                   gen7_mfd_context->jpeg_wa_slice_data_bo,
                   I915_GEM_DOMAIN_INSTRUCTION, 0,
                   0);
-    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
-    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, i965->intel.mocs_state);
     OUT_BCS_BATCH(batch, 0);
     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
     OUT_BCS_BATCH(batch, 0);
@@ -2712,6 +2710,40 @@ gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
     intel_batchbuffer_flush(batch);
 }
 
+static const int vp8_dc_qlookup[128] =
+{
+      4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
+     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
+     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
+     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
+     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
+    122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
+};
+
+static const int vp8_ac_qlookup[128] =
+{
+      4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
+     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
+     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
+     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
+     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
+    110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
+    155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
+    213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
+};
+
+static inline unsigned int vp8_clip_quantization_index(int index)
+{
+    if(index > 127)
+        return 127;
+    else if(index <0)
+        return 0;
+
+    return index;
+}
+
 static void
 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
                           struct decode_state *decode_state,
@@ -2727,18 +2759,27 @@ gen8_mfd_vp8_decode_init(VADriverContextP ctx,
     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
     assert(height_in_mbs > 0 && height_in_mbs <= 256);
 
+    intel_update_vp8_frame_store_index(ctx,
+                                       decode_state,
+                                       pic_param,
+                                       gen7_mfd_context->reference_surface);
+
     /* Current decoded picture */
     obj_surface = decode_state->render_object;
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
 
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
-    gen7_mfd_context->post_deblocking_output.bo = NULL;
-    gen7_mfd_context->post_deblocking_output.valid = 0;
+    gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
+    gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
 
     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
-    gen7_mfd_context->pre_deblocking_output.valid = 1;
+    gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
+
+    intel_ensure_vp8_segmentation_buffer(ctx,
+        &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
 
     /* The same as AVC */
     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
@@ -2785,13 +2826,22 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx,
                        struct decode_state *decode_state,
                        struct gen7_mfd_context *gen7_mfd_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
     dri_bo *probs_bo = decode_state->probability_data->bo;
     int i, j,log2num;
-
+    unsigned int quantization_value[4][6];
+
+    /* There is no safe way to error out if the segmentation buffer
+       could not be allocated. So, instead of aborting, simply decode
+       something even if the result may look totally inacurate */
+    const unsigned int enable_segmentation =
+        pic_param->pic_fields.bits.segmentation_enabled &&
+        gen7_mfd_context->segmentation_buffer.valid;
+        
     log2num = (int)log2(slice_param->num_of_partitions - 1);
 
     BEGIN_BCS_BATCH(batch, 38);
@@ -2806,11 +2856,14 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx,
                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
-                  pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
+                  (enable_segmentation &&
+                   pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
-                  0 << 7 | /* segmentation id streamin disabled */
-                  0 << 6 | /* segmentation id streamout disabled */
-                  pic_param->pic_fields.bits.key_frame << 5 |
+                  (enable_segmentation &&
+                   !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
+                  (enable_segmentation &&
+                   pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
+                  (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
                   pic_param->pic_fields.bits.filter_type << 4 |
                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
@@ -2823,24 +2876,34 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx,
 
     /* Quantizer Value for 4 segmetns, DW4-DW15 */
     for (i = 0; i < 4; i++) {
+               quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
+               quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
+               quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
+               /* 101581>>16 is equivalent to 155/100 */
+               quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
+               quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
+               quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
+
+               quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
+               quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
+
+               OUT_BCS_BATCH(batch,
+                      quantization_value[i][0] << 16 | /* Y1AC */
+                      quantization_value[i][1] <<  0); /* Y1DC */
         OUT_BCS_BATCH(batch,
-                      iq_matrix->quantization_index[i][0] << 16 | /* Y1AC */
-                      iq_matrix->quantization_index[i][1] <<  0); /* Y1DC */
-        OUT_BCS_BATCH(batch,
-                      iq_matrix->quantization_index[i][5] << 16 | /* UVAC */
-                      iq_matrix->quantization_index[i][4] <<  0); /* UVDC */
+                      quantization_value[i][5] << 16 | /* UVAC */
+                      quantization_value[i][4] <<  0); /* UVDC */
         OUT_BCS_BATCH(batch,
-                      iq_matrix->quantization_index[i][3] << 16 | /* Y2AC */
-                      iq_matrix->quantization_index[i][2] <<  0); /* Y2DC */
+                      quantization_value[i][3] << 16 | /* Y2AC */
+                      quantization_value[i][2] <<  0); /* Y2DC */
     }
 
     /* CoeffProbability table for non-key frame, DW16-DW18 */
     if (probs_bo) {
-        OUT_BCS_RELOC(batch, probs_bo,
+        OUT_BCS_RELOC64(batch, probs_bo,
                       0, I915_GEM_DOMAIN_INSTRUCTION,
                       0);
-        OUT_BCS_BATCH(batch, 0);
-        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, i965->intel.mocs_state);
     } else {
         OUT_BCS_BATCH(batch, 0);
         OUT_BCS_BATCH(batch, 0);
@@ -2881,21 +2944,29 @@ gen8_mfd_vp8_pic_state(VADriverContextP ctx,
     }
 
     OUT_BCS_BATCH(batch,
-                  pic_param->loop_filter_deltas_ref_frame[3] << 24 |
-                  pic_param->loop_filter_deltas_ref_frame[2] << 16 |
-                  pic_param->loop_filter_deltas_ref_frame[1] <<  8 |
-                  pic_param->loop_filter_deltas_ref_frame[0] <<  0);
+                  (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
+                  (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
+                  (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
+                  (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
 
     OUT_BCS_BATCH(batch,
-                  pic_param->loop_filter_deltas_mode[3] << 24 |
-                  pic_param->loop_filter_deltas_mode[2] << 16 |
-                  pic_param->loop_filter_deltas_mode[1] <<  8 |
-                  pic_param->loop_filter_deltas_mode[0] <<  0);
+                  (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
+                  (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
+                  (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
+                  (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
 
     /* segmentation id stream base address, DW35-DW37 */
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
+    if (enable_segmentation) {
+        OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
+                      0, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+        OUT_BCS_BATCH(batch, i965->intel.mocs_state);
+    }
+    else {
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+    }
     ADVANCE_BCS_BATCH(batch);
 }
 
@@ -2908,7 +2979,16 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     int i, log2num;
-    unsigned int offset = slice_param->slice_data_offset;
+    unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
+    unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
+    unsigned int partition_size_0 = slice_param->partition_size[0];
+
+    assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
+    if (used_bits == 8) {
+        used_bits = 0;
+        offset += 1;
+        partition_size_0 -= 1;
+    }
 
     assert(slice_param->num_of_partitions >= 2);
     assert(slice_param->num_of_partitions <= 9);
@@ -2918,7 +2998,7 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
     BEGIN_BCS_BATCH(batch, 22);
     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
     OUT_BCS_BATCH(batch,
-                  pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */
+                  used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
                   log2num << 4 |
                   (slice_param->macroblock_offset & 0x7));
@@ -2926,9 +3006,13 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
                   0);
 
-    for (i = 0; i < 9; i++) {
+    OUT_BCS_BATCH(batch, partition_size_0 + 1);
+    OUT_BCS_BATCH(batch, offset);
+    //partion sizes in bytes are present after the above first partition when there are more than one token partition
+    offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
+    for (i = 1; i < 9; i++) {
         if (i < slice_param->num_of_partitions) {
-            OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
+            OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
             OUT_BCS_BATCH(batch, offset);
         } else {
             OUT_BCS_BATCH(batch, 0);
@@ -2938,9 +3022,7 @@ gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
         offset += slice_param->partition_size[i];
     }
 
-    OUT_BCS_BATCH(batch,
-                  1 << 31 | /* concealment method */
-                  0);
+    OUT_BCS_BATCH(batch, 0); /* concealment method */
 
     ADVANCE_BCS_BATCH(batch);
 }
@@ -2959,12 +3041,18 @@ gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
 
     /* one slice per frame */
-    assert(decode_state->num_slice_params == 1);
-    assert(decode_state->slice_params[0]->num_elements == 1);
-    assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
-    assert(decode_state->slice_datas[0]->bo);
+    if (decode_state->num_slice_params != 1 ||
+        (!decode_state->slice_params ||
+         !decode_state->slice_params[0] ||
+         (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
+        (!decode_state->slice_datas ||
+         !decode_state->slice_datas[0] ||
+         !decode_state->slice_datas[0]->bo) ||
+        !decode_state->probability_data) {
+        WARN_ONCE("Wrong parameters for VP8 decoding\n");
 
-    assert(decode_state->probability_data);
+        return;
+    }
 
     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
     slice_data_bo = decode_state->slice_datas[0]->bo;
@@ -3009,9 +3097,11 @@ gen8_mfd_decode_picture(VADriverContextP ctx,
         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
         break;
         
-    case VAProfileH264Baseline:
+    case VAProfileH264ConstrainedBaseline:
     case VAProfileH264Main:
     case VAProfileH264High:
+    case VAProfileH264StereoHigh:
+    case VAProfileH264MultiviewHigh:
         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
         break;
 
@@ -3043,8 +3133,11 @@ out:
 static void
 gen8_mfd_context_destroy(void *hw_context)
 {
+    VADriverContextP ctx;
     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
 
+    ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
+
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
     gen7_mfd_context->post_deblocking_output.bo = NULL;
 
@@ -3066,8 +3159,18 @@ gen8_mfd_context_destroy(void *hw_context)
     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
 
+    dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
+    gen7_mfd_context->segmentation_buffer.bo = NULL;
+
     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
 
+    if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
+        i965_DestroySurfaces(ctx,
+                             &gen7_mfd_context->jpeg_wa_surface_id,
+                             1);
+        gen7_mfd_context->jpeg_wa_surface_object = NULL;
+    }
+
     intel_batchbuffer_free(gen7_mfd_context->base.batch);
     free(gen7_mfd_context);
 }
@@ -3088,6 +3191,9 @@ gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
     int i;
 
+    if (!gen7_mfd_context)
+        return NULL;
+
     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
@@ -3098,6 +3204,7 @@ gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
     }
 
     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
+    gen7_mfd_context->segmentation_buffer.valid = 0;
 
     switch (obj_config->profile) {
     case VAProfileMPEG2Simple:
@@ -3105,13 +3212,17 @@ gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
         break;
 
-    case VAProfileH264Baseline:
+    case VAProfileH264ConstrainedBaseline:
     case VAProfileH264Main:
     case VAProfileH264High:
+    case VAProfileH264StereoHigh:
+    case VAProfileH264MultiviewHigh:
         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
         break;
     default:
         break;
     }
+
+    gen7_mfd_context->driver_context = ctx;
     return (struct hw_context *)gen7_mfd_context;
 }