OSDN Git Service

Fix a typo
[android-x86/hardware-intel-common-vaapi.git] / src / i965_post_processing.c
old mode 100755 (executable)
new mode 100644 (file)
index 491234b..cd6b2c0
 #include "i965_drv_video.h"
 #include "i965_post_processing.h"
 #include "i965_render.h"
+#include "i965_yuv_coefs.h"
 #include "intel_media.h"
+#include "intel_gen_vppapi.h"
+
+#include "gen75_picture_process.h"
 
 extern VAStatus
 vpp_surface_convert(VADriverContextP ctx,
@@ -48,7 +52,7 @@ vpp_surface_convert(VADriverContextP ctx,
 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
 
 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
-                       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
+            MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
 
 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
@@ -131,10 +135,10 @@ static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_proces
                                    void *filter_param);
 static VAStatus
 pp_nv12_avs_initialize(VADriverContextP ctx,
-    struct i965_post_processing_context *pp_context,
-    const struct i965_surface *src_surface, const VARectangle *src_rect,
-    struct i965_surface *dst_surface, const VARectangle *dst_rect,
-    void *filter_param);
+                       struct i965_post_processing_context *pp_context,
+                       const struct i965_surface *src_surface, const VARectangle *src_rect,
+                       struct i965_surface *dst_surface, const VARectangle *dst_rect,
+                       void *filter_param);
 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
                                            const struct i965_surface *src_surface,
                                            const VARectangle *src_rect,
@@ -283,7 +287,7 @@ static struct pp_module pp_modules_gen5[] = {
             sizeof(pp_nv12_load_save_pa_gen5),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -295,7 +299,7 @@ static struct pp_module pp_modules_gen5[] = {
             sizeof(pp_pl3_load_save_pa_gen5),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -307,7 +311,7 @@ static struct pp_module pp_modules_gen5[] = {
             sizeof(pp_pa_load_save_nv12_gen5),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -319,7 +323,7 @@ static struct pp_module pp_modules_gen5[] = {
             sizeof(pp_pa_load_save_pl3_gen5),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -343,10 +347,10 @@ static struct pp_module pp_modules_gen5[] = {
             sizeof(pp_rgbx_load_save_nv12_gen5),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
-            
+
     {
         {
             "NV12_RGBX module",
@@ -355,7 +359,7 @@ static struct pp_module pp_modules_gen5[] = {
             sizeof(pp_nv12_load_save_rgbx_gen5),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 };
@@ -457,7 +461,7 @@ static struct pp_module pp_modules_gen6[] = {
             sizeof(pp_nv12_load_save_pl3_gen6),
             NULL,
         },
-        
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -540,7 +544,7 @@ static struct pp_module pp_modules_gen6[] = {
             sizeof(pp_nv12_load_save_pa_gen6),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -552,7 +556,7 @@ static struct pp_module pp_modules_gen6[] = {
             sizeof(pp_pl3_load_save_pa_gen6),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -564,7 +568,7 @@ static struct pp_module pp_modules_gen6[] = {
             sizeof(pp_pa_load_save_nv12_gen6),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -576,7 +580,7 @@ static struct pp_module pp_modules_gen6[] = {
             sizeof(pp_pa_load_save_pl3_gen6),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -600,7 +604,7 @@ static struct pp_module pp_modules_gen6[] = {
             sizeof(pp_rgbx_load_save_nv12_gen6),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 
@@ -612,7 +616,7 @@ static struct pp_module pp_modules_gen6[] = {
             sizeof(pp_nv12_load_save_rgbx_gen6),
             NULL,
         },
-    
+
         pp_plx_load_save_plx_initialize,
     },
 };
@@ -725,7 +729,7 @@ static struct pp_module pp_modules_gen7[] = {
             sizeof(pp_nv12_load_save_pl3_gen7),
             NULL,
         },
-        
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -808,7 +812,7 @@ static struct pp_module pp_modules_gen7[] = {
             sizeof(pp_nv12_load_save_pa_gen7),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -820,7 +824,7 @@ static struct pp_module pp_modules_gen7[] = {
             sizeof(pp_pl3_load_save_pa_gen7),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -832,7 +836,7 @@ static struct pp_module pp_modules_gen7[] = {
             sizeof(pp_pa_load_save_nv12_gen7),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -844,7 +848,7 @@ static struct pp_module pp_modules_gen7[] = {
             sizeof(pp_pa_load_save_pl3_gen7),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -868,7 +872,7 @@ static struct pp_module pp_modules_gen7[] = {
             sizeof(pp_rgbx_load_save_nv12_gen7),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -880,10 +884,10 @@ static struct pp_module pp_modules_gen7[] = {
             sizeof(pp_nv12_load_save_rgbx_gen7),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
-            
+
 };
 
 static const uint32_t pp_null_gen75[][4] = {
@@ -975,7 +979,7 @@ static struct pp_module pp_modules_gen75[] = {
             sizeof(pp_nv12_load_save_pl3_gen75),
             NULL,
         },
-        
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -1059,7 +1063,7 @@ static struct pp_module pp_modules_gen75[] = {
             sizeof(pp_nv12_load_save_pa_gen75),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -1071,7 +1075,7 @@ static struct pp_module pp_modules_gen75[] = {
             sizeof(pp_pl3_load_save_pa_gen75),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -1083,7 +1087,7 @@ static struct pp_module pp_modules_gen75[] = {
             sizeof(pp_pa_load_save_nv12_gen75),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -1095,7 +1099,7 @@ static struct pp_module pp_modules_gen75[] = {
             sizeof(pp_pa_load_save_pl3_gen75),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -1119,7 +1123,7 @@ static struct pp_module pp_modules_gen75[] = {
             sizeof(pp_rgbx_load_save_nv12_gen75),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
 
@@ -1131,10 +1135,10 @@ static struct pp_module pp_modules_gen75[] = {
             sizeof(pp_nv12_load_save_rgbx_gen75),
             NULL,
         },
-    
+
         gen7_pp_plx_avs_initialize,
     },
-            
+
 };
 
 static void
@@ -1175,9 +1179,9 @@ pp_dndi_context_init(struct pp_dndi_context *dndi_ctx)
 
 static VAStatus
 pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
-    struct object_surface *obj_surface,
-    const VAProcPipelineParameterBuffer *pipe_params,
-    const VAProcFilterParameterBufferDeinterlacing *deint_params)
+                                    struct object_surface *obj_surface,
+                                    const VAProcPipelineParameterBuffer *pipe_params,
+                                    const VAProcFilterParameterBufferDeinterlacing *deint_params)
 {
     DNDIFrameStore *fs;
 
@@ -1222,8 +1226,7 @@ pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
                 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
                 if (fs->surface_id == VA_INVALID_ID)
                     dndi_ctx->is_first_frame = 1;
-            }
-            else {
+            } else {
                 if (pipe_params->num_forward_references < 1 ||
                     pipe_params->forward_references[0] == VA_INVALID_ID) {
                     WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n");
@@ -1243,8 +1246,8 @@ pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
 
 static VAStatus
 pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
-    struct i965_post_processing_context *pp_context,
-    struct object_surface *src_surface, struct object_surface *dst_surface)
+                                        struct i965_post_processing_context *pp_context,
+                                        struct object_surface *src_surface, struct object_surface *dst_surface)
 {
     struct i965_driver_data * const i965 = i965_driver_data(ctx);
     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
@@ -1260,13 +1263,12 @@ pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
         src_sampling = src_surface->subsampling;
         dri_bo_get_tiling(src_surface->bo, &src_tiling, &swizzle);
         src_tiling = !!src_tiling;
-    }
-    else {
+    } else {
         src_fourcc = VA_FOURCC_NV12;
         src_sampling = SUBSAMPLE_YUV420;
         src_tiling = 1;
         status = i965_check_alloc_surface_bo(ctx, src_surface,
-            src_tiling, src_fourcc, src_sampling);
+                                             src_tiling, src_fourcc, src_sampling);
         if (status != VA_STATUS_SUCCESS)
             return status;
     }
@@ -1277,13 +1279,12 @@ pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
         dst_sampling = dst_surface->subsampling;
         dri_bo_get_tiling(dst_surface->bo, &dst_tiling, &swizzle);
         dst_tiling = !!dst_tiling;
-    }
-    else {
+    } else {
         dst_fourcc = VA_FOURCC_NV12;
         dst_sampling = SUBSAMPLE_YUV420;
         dst_tiling = 1;
         status = i965_check_alloc_surface_bo(ctx, dst_surface,
-            dst_tiling, dst_fourcc, dst_sampling);
+                                             dst_tiling, dst_fourcc, dst_sampling);
         if (status != VA_STATUS_SUCCESS)
             return status;
     }
@@ -1305,8 +1306,7 @@ pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
             if (i <= DNDI_FRAME_IN_STMM) {
                 width = src_surface->orig_width;
                 height = src_surface->orig_height;
-            }
-            else {
+            } else {
                 width = dst_surface->orig_width;
                 height = dst_surface->orig_height;
             }
@@ -1323,15 +1323,13 @@ pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
 
         if (i <= DNDI_FRAME_IN_PREVIOUS) {
             status = i965_check_alloc_surface_bo(ctx, obj_surface,
-                src_tiling, src_fourcc, src_sampling);
-        }
-        else if (i == DNDI_FRAME_IN_STMM || i == DNDI_FRAME_OUT_STMM) {
+                                                 src_tiling, src_fourcc, src_sampling);
+        } else if (i == DNDI_FRAME_IN_STMM || i == DNDI_FRAME_OUT_STMM) {
             status = i965_check_alloc_surface_bo(ctx, obj_surface,
-                1, VA_FOURCC_Y800, SUBSAMPLE_YUV400);
-        }
-        else if (i >= DNDI_FRAME_OUT_CURRENT) {
+                                                 1, VA_FOURCC_Y800, SUBSAMPLE_YUV400);
+        } else if (i >= DNDI_FRAME_OUT_CURRENT) {
             status = i965_check_alloc_surface_bo(ctx, obj_surface,
-                dst_tiling, dst_fourcc, dst_sampling);
+                                                 dst_tiling, dst_fourcc, dst_sampling);
         }
         if (status != VA_STATUS_SUCCESS)
             return status;
@@ -1343,8 +1341,8 @@ pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
 
 static VAStatus
 pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
-    struct i965_post_processing_context *pp_context,
-    struct object_surface *src_surface, struct object_surface *dst_surface)
+                                struct i965_post_processing_context *pp_context,
+                                struct object_surface *src_surface, struct object_surface *dst_surface)
 {
     struct i965_driver_data * const i965 = i965_driver_data(ctx);
     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
@@ -1353,7 +1351,7 @@ pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
 
     /* Update the previous input surface */
     is_new_frame = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].surface_id !=
-        src_surface->base.id;
+                   src_surface->base.id;
     if (is_new_frame) {
         ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
         ofs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
@@ -1375,8 +1373,7 @@ pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
             if (obj_surface->base.id == ofs->surface_id) {
                 *ifs = *ofs;
                 pp_dndi_frame_store_reset(ofs);
-            }
-            else {
+            } else {
                 ifs->obj_surface = obj_surface;
                 ifs->surface_id = obj_surface->base.id;
             }
@@ -1392,13 +1389,13 @@ pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
     /* Update the Spatial Temporal Motion Measure (STMM) surfaces */
     if (is_new_frame)
         pp_dndi_frame_store_swap(&dndi_ctx->frame_store[DNDI_FRAME_IN_STMM],
-            &dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM]);
+                                 &dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM]);
 
     /* Update the output surfaces */
     ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT];
     if (dndi_ctx->is_di_adv_enabled && !dndi_ctx->is_first_frame) {
         pp_dndi_frame_store_swap(ofs,
-            &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS]);
+                                 &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS]);
         if (!dndi_ctx->is_second_field)
             ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS];
     }
@@ -1409,7 +1406,7 @@ pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
-static int
+int
 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
 {
     int fourcc;
@@ -1570,7 +1567,7 @@ ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
     vfe_state->vfe1.children_present = 0;
-    vfe_state->vfe2.interface_descriptor_base = 
+    vfe_state->vfe2.interface_descriptor_base =
         pp_context->idrt.bo->offset >> 4; /* reloc */
     dri_bo_emit_reloc(bo,
                       I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -1627,7 +1624,7 @@ ironlake_pp_urb_layout(VADriverContextP ctx,
     BEGIN_BATCH(batch, 3);
     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
     OUT_BATCH(batch, 0);
-    OUT_BATCH(batch, 
+    OUT_BATCH(batch,
               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
     ADVANCE_BATCH(batch);
@@ -1664,7 +1661,7 @@ ironlake_pp_state_pointers(VADriverContextP ctx,
     ADVANCE_BATCH(batch);
 }
 
-static void 
+static void
 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
                           struct i965_post_processing_context *pp_context)
 {
@@ -1689,7 +1686,7 @@ ironlake_pp_constant_buffer(VADriverContextP ctx,
     OUT_RELOC(batch, pp_context->curbe.bo,
               I915_GEM_DOMAIN_INSTRUCTION, 0,
               pp_context->urb.size_cs_entry - 1);
-    ADVANCE_BATCH(batch);    
+    ADVANCE_BATCH(batch);
 }
 
 static void
@@ -1742,7 +1739,7 @@ ironlake_pp_pipeline_setup(VADriverContextP ctx,
 
 // update u/v offset when the surface format are packed yuv
 static void i965_update_src_surface_static_parameter(
-    VADriverContextP    ctx, 
+    VADriverContextP    ctx,
     struct i965_post_processing_context *pp_context,
     const struct i965_surface *surface)
 {
@@ -1769,11 +1766,11 @@ static void i965_update_src_surface_static_parameter(
     default:
         break;
     }
-    
+
 }
 
 static void i965_update_dst_surface_static_parameter(
-    VADriverContextP    ctx, 
+    VADriverContextP    ctx,
     struct i965_post_processing_context *pp_context,
     const struct i965_surface *surface)
 {
@@ -1800,13 +1797,13 @@ static void i965_update_dst_surface_static_parameter(
     default:
         break;
     }
-    
+
 }
 
 static void
 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
                           dri_bo *surf_bo, unsigned long surf_bo_offset,
-                          int width, int height, int pitch, int format, 
+                          int width, int height, int pitch, int format,
                           int index, int is_target)
 {
     struct i965_surface_state *ss;
@@ -1881,10 +1878,10 @@ i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_con
 static void
 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
                           dri_bo *surf_bo, unsigned long surf_bo_offset,
-                          int width, int height, int pitch, int format, 
+                          int width, int height, int pitch, int format,
                           int index, int is_target)
 {
-    struct i965_driver_data * const i965 = i965_driver_data(ctx);  
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
     struct gen7_surface_state *ss;
     dri_bo *ss_bo;
     unsigned int tiling;
@@ -1956,9 +1953,9 @@ gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_con
     dri_bo_unmap(ss2_bo);
 }
 
-static void 
+static void
 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
-                                const struct i965_surface *surface, 
+                                const struct i965_surface *surface,
                                 int base_index, int is_target,
                                 int *width, int *height, int *pitch, int *offset)
 {
@@ -1969,10 +1966,10 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin
     const int Y = 0;
     const int U = ((fourcc == VA_FOURCC_YV12) ||
                    (fourcc == VA_FOURCC_YV16))
-                   ? 2 : 1;
+                  ? 2 : 1;
     const int V = ((fourcc == VA_FOURCC_YV12) ||
                    (fourcc == VA_FOURCC_YV16))
-                   ? 1 : 2;
+                  ? 1 : 2;
     const int UV = 1;
     int interleaved_uv = fourcc == VA_FOURCC_NV12;
     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
@@ -1981,7 +1978,7 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin
                               fourcc == VA_FOURCC_BGRA ||
                               fourcc == VA_FOURCC_BGRX);
     int scale_factor_of_1st_plane_width_in_byte = 1;
-                              
+
     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
         obj_surface = (struct object_surface *)surface->base;
         bo = obj_surface->bo;
@@ -1991,12 +1988,10 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin
         offset[0] = 0;
 
         if (full_packed_format) {
-            scale_factor_of_1st_plane_width_in_byte = 4; 
-        }
-        else if (packed_yuv ) {
-            scale_factor_of_1st_plane_width_in_byte =  2; 
-        }
-        else if (interleaved_uv) {
+            scale_factor_of_1st_plane_width_in_byte = 4;
+        } else if (packed_yuv) {
+            scale_factor_of_1st_plane_width_in_byte =  2;
+        } else if (interleaved_uv) {
             width[1] = obj_surface->orig_width;
             height[1] = obj_surface->orig_height / 2;
             pitch[1] = obj_surface->width;
@@ -2021,11 +2016,9 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin
 
         if (full_packed_format) {
             scale_factor_of_1st_plane_width_in_byte = 4;
-        }
-        else if (packed_yuv ) {
+        } else if (packed_yuv) {
             scale_factor_of_1st_plane_width_in_byte = 2;
-        }
-        else if (interleaved_uv) {
+        } else if (interleaved_uv) {
             width[1] = obj_image->image.width;
             height[1] = obj_image->image.height / 2;
             pitch[1] = obj_image->image.pitches[1];
@@ -2077,9 +2070,9 @@ pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processin
 
 }
 
-static void 
+static void
 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
-                                     const struct i965_surface *surface, 
+                                     const struct i965_surface *surface,
                                      int base_index, int is_target,
                                      const VARectangle *rect,
                                      int *width, int *height, int *pitch, int *offset)
@@ -2202,16 +2195,16 @@ gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_proc
             break;
         }
 
-       if (fourcc_info->format == I965_COLOR_RGB) {
-           struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
-           /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
-           format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
-           pp_static_parameter->grf2.src_avs_rgb_swap = 0;
-           if ((fourcc == VA_FOURCC_BGRA) ||
+        if (fourcc_info->format == I965_COLOR_RGB) {
+            struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+            /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
+            format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
+            pp_static_parameter->grf2.src_avs_rgb_swap = 0;
+            if ((fourcc == VA_FOURCC_BGRA) ||
                 (fourcc == VA_FOURCC_BGRX)) {
-               pp_static_parameter->grf2.src_avs_rgb_swap = 1;
-           }
-       }
+                pp_static_parameter->grf2.src_avs_rgb_swap = 1;
+            }
+        }
 
         gen7_pp_set_surface2_state(ctx, pp_context,
                                    bo, offset[0],
@@ -2313,29 +2306,25 @@ static void calculate_boundary_block_mask(struct i965_post_processing_context *p
     /* x offset of dest surface must be dword aligned.
      * so we have to extend dst surface on left edge, and mask out pixels not interested
      */
-    if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
+    if (dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT) {
         pp_context->block_horizontal_mask_left = 0;
-        for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
-        {
-            pp_context->block_horizontal_mask_left |= 1<<i;
+        for (i = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT; i < GPU_ASM_BLOCK_WIDTH; i++) {
+            pp_context->block_horizontal_mask_left |= 1 << i;
         }
-    }
-    else {
+    } else {
         pp_context->block_horizontal_mask_left = 0xffff;
     }
-    
-    int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
-    if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
-        pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
-    }
-    else {
+
+    int dst_width_adjust = dst_rect->width + dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
+    if (dst_width_adjust % GPU_ASM_BLOCK_WIDTH) {
+        pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust % GPU_ASM_BLOCK_WIDTH)) - 1;
+    } else {
         pp_context->block_horizontal_mask_right = 0xffff;
     }
-    
-    if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
-        pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
-    }
-    else {
+
+    if (dst_rect->height % GPU_ASM_BLOCK_HEIGHT) {
+        pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height % GPU_ASM_BLOCK_HEIGHT)) - 1;
+    } else {
         pp_context->block_vertical_mask_bottom = 0xff;
     }
 
@@ -2367,11 +2356,11 @@ pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processin
     pp_context->private_context = &pp_context->pp_load_save_context;
     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
 
-    int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
+    int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;;
     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
     pp_load_save_context->dest_y = dst_rect->y;
     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
-    pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
+    pp_load_save_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
 
     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
@@ -2380,8 +2369,8 @@ pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processin
     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
 
     // update u/v offset for packed yuv
-    i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
-    i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
+    i965_update_src_surface_static_parameter(ctx, pp_context, src_surface);
+    i965_update_dst_surface_static_parameter(ctx, pp_context, dst_surface);
 
     dst_surface->flags = src_surface->flags;
 
@@ -2415,7 +2404,7 @@ pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context,
     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
-    
+
     return 0;
 }
 
@@ -2500,18 +2489,18 @@ pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_con
     pp_context->private_context = &pp_context->pp_scaling_context;
     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
 
-    int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
-    float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
+    int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
+    float src_left_edge_extend = (float)dst_left_edge_extend * src_rect->width / dst_rect->width;
     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
     pp_scaling_context->dest_y = dst_rect->y;
     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
-    pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
+    pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend) / in_w;
     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
 
     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
 
-    pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
+    pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float)(src_rect->width + src_left_edge_extend) / in_w / (dst_rect->width + dst_left_edge_extend);
     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
 
@@ -2549,15 +2538,15 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
     } else if (tmp_w >= pp_avs_context->dest_w) {
         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
         pp_inline_parameter->grf6.video_step_delta = 0;
-        
+
         if (x == 0) {
             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
-                pp_avs_context->src_normalized_x;
+                                                                                               pp_avs_context->src_normalized_x;
         } else {
             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
-                16 * 15 * video_step_delta / 2;
+                                                                                                16 * 15 * video_step_delta / 2;
         }
     } else {
         int n0, n1, n2, nls_left, nls_right;
@@ -2570,7 +2559,7 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
         nls_left = n0 + n2;
         nls_right = n1 + n2;
         f = (float) n2 * 16 / tmp_w;
-        
+
         if (n0 < 5) {
             pp_inline_parameter->grf6.video_step_delta = 0.0;
 
@@ -2581,14 +2570,14 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
-                    16 * 15 * video_step_delta / 2;
+                                                                                                    16 * 15 * video_step_delta / 2;
             }
         } else {
             if (x < nls_left) {
                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
                 float a = f / (nls_left * 16 * factor_b);
                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
-                
+
                 pp_inline_parameter->grf6.video_step_delta = b;
 
                 if (x == 0) {
@@ -2598,7 +2587,7 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
-                        16 * 15 * video_step_delta / 2;
+                                                                                                        16 * 15 * video_step_delta / 2;
                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
                 }
             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
@@ -2606,7 +2595,7 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
-                    16 * 15 * video_step_delta / 2;
+                                                                                                    16 * 15 * video_step_delta / 2;
                 pp_inline_parameter->grf6.video_step_delta = 0.0;
                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
             } else {
@@ -2616,7 +2605,7 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
-                    16 * 15 * video_step_delta / 2;
+                                                                                                    16 * 15 * video_step_delta / 2;
                 pp_inline_parameter->grf6.video_step_delta = -b;
 
                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
@@ -2703,7 +2692,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
     float sx, sy;
 
     const int nlas = (pp_context->filter_flags & VA_FILTER_SCALING_MASK) ==
-        VA_FILTER_SCALING_NL_ANAMORPHIC;
+                     VA_FILTER_SCALING_NL_ANAMORPHIC;
 
     /* surface */
     obj_surface = (struct object_surface *)src_surface->base;
@@ -2889,7 +2878,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
     sampler_8x8[index].dw13.maximum_limiter = 11;
     sampler_8x8[index].dw14.clip_limiter = 130;
     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
-                      I915_GEM_DOMAIN_RENDER, 
+                      I915_GEM_DOMAIN_RENDER,
                       0,
                       0,
                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
@@ -2953,7 +2942,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
     sampler_8x8[index].dw13.maximum_limiter = 11;
     sampler_8x8[index].dw14.clip_limiter = 130;
     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
-                      I915_GEM_DOMAIN_RENDER, 
+                      I915_GEM_DOMAIN_RENDER,
                       0,
                       0,
                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
@@ -2967,13 +2956,13 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
     pp_context->private_context = &pp_context->pp_avs_context;
     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
 
-    int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
-    float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
+    int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
+    float src_left_edge_extend = (float)dst_left_edge_extend * src_rect->width / dst_rect->width;
     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
     pp_avs_context->dest_y = dst_rect->y;
     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
-    pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
+    pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend) / in_w;
     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
     pp_avs_context->src_h = src_rect->height;
@@ -2981,7 +2970,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
 
-    pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
+    pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float)(src_rect->width + src_left_edge_extend) / in_w / (dst_rect->width + dst_left_edge_extend);
     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
     pp_inline_parameter->grf6.video_step_delta = 0.0;
@@ -3029,21 +3018,21 @@ gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context,
     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
 
-    pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
-    pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
-    pp_inline_parameter->grf7.constant_0 = 0xffffffff;
-    pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
+    pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
+    pp_inline_parameter->grf9.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
+    pp_inline_parameter->grf9.constant_0 = 0xffffffff;
+    pp_inline_parameter->grf9.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
 
     return 0;
 }
 
-static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
+static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx,
                                               struct i965_post_processing_context *pp_context,
                                               const struct i965_surface *surface)
 {
     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
     int fourcc = pp_get_surface_fourcc(ctx, surface);
-    
+
     if (fourcc == VA_FOURCC_YUY2) {
         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
@@ -3073,6 +3062,8 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
     int src_width, src_height;
     AVSState * const avs = &pp_avs_context->state;
     float sx, sy;
+    const float * yuv_to_rgb_coefs;
+    size_t yuv_to_rgb_coefs_size;
 
     /* source surface */
     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
@@ -3160,8 +3151,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
         sampler_8x8_state->dw137.hsw.adaptive_filter_for_all_channel = 1;
         sampler_8x8_state->dw137.hsw.bypass_y_adaptive_filtering = 1;
         sampler_8x8_state->dw137.hsw.bypass_x_adaptive_filtering = 1;
-    }
-    else {
+    } else {
         sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
         sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
         sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
@@ -3197,7 +3187,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
 
     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
-                      I915_GEM_DOMAIN_RENDER, 
+                      I915_GEM_DOMAIN_RENDER,
                       0,
                       0,
                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
@@ -3224,7 +3214,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
 
     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
-                      I915_GEM_DOMAIN_RENDER, 
+                      I915_GEM_DOMAIN_RENDER,
                       0,
                       0,
                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
@@ -3251,7 +3241,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
 
     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
-                      I915_GEM_DOMAIN_RENDER, 
+                      I915_GEM_DOMAIN_RENDER,
                       0,
                       0,
                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
@@ -3265,21 +3255,22 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
     pp_context->private_context = &pp_context->pp_avs_context;
     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
 
-    pp_avs_context->dest_x = dst_rect->x;
+    int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
+    pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
     pp_avs_context->dest_y = dst_rect->y;
-    pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
+    pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
     pp_avs_context->src_w = src_rect->width;
     pp_avs_context->src_h = src_rect->height;
     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
 
     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
-    dw = MAX(dw, dst_rect->width);
+    dw = MAX(dw, dst_rect->width + dst_left_edge_extend);
 
     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
     if (IS_HASWELL(i965->intel.device_info))
-       pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
+        pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
 
     if (pp_static_parameter->grf2.avs_wa_enable) {
         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
@@ -3290,7 +3281,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
             pp_static_parameter->grf2.avs_wa_enable = 0;
         }
     }
-       
+
     pp_static_parameter->grf2.avs_wa_width = src_width;
     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
@@ -3299,12 +3290,17 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
-        (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
+                                                                   (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
-        (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
+                                                                     (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
 
     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
 
+    yuv_to_rgb_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(src_surface->flags &
+                                                                                  VA_SRC_COLOR_MASK),
+                                                    &yuv_to_rgb_coefs_size);
+    memcpy(&pp_static_parameter->grf7, yuv_to_rgb_coefs, yuv_to_rgb_coefs_size);
+
     dst_surface->flags = src_surface->flags;
 
     return VA_STATUS_SUCCESS;
@@ -3351,9 +3347,9 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex
     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
         filter_param;
     struct object_surface * const src_obj_surface = (struct object_surface *)
-        src_surface->base;
+                                                    src_surface->base;
     struct object_surface * const dst_obj_surface = (struct object_surface *)
-        dst_surface->base;
+                                                    dst_surface->base;
     struct object_surface *obj_surface;
     struct i965_sampler_dndi *sampler_dndi;
     int index, dndi_top_first;
@@ -3361,37 +3357,37 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex
     VAStatus status;
 
     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
-        pipe_params, deint_params);
+                                                 pipe_params, deint_params);
     if (status != VA_STATUS_SUCCESS)
         return status;
 
     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
-        src_obj_surface, dst_obj_surface);
+                                             src_obj_surface, dst_obj_surface);
     if (status != VA_STATUS_SUCCESS)
         return status;
 
     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
-        src_obj_surface, dst_obj_surface);
+                                                     src_obj_surface, dst_obj_surface);
     if (status != VA_STATUS_SUCCESS)
         return status;
 
     /* Current input surface (index = 4) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
-        obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
-        0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
+                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
+                               0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
 
     /* Previous input surface (index = 5) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
-        obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
-        0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 5);
+                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
+                               0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 5);
 
     /* STMM input surface (index = 6) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
-        obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
-        I965_SURFACEFORMAT_R8_UNORM, 6, 1);
+                              obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
+                              I965_SURFACEFORMAT_R8_UNORM, 6, 1);
 
     /* Previous output surfaces (index = { 7, 8 }) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
@@ -3401,9 +3397,9 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex
     orig_h = obj_surface->orig_height;
 
     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
-        ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1);
+                              ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1);
     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
-        ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1);
+                              ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1);
 
     /* Current output surfaces (index = { 10, 11 }) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
@@ -3413,15 +3409,15 @@ pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_contex
     orig_h = obj_surface->orig_height;
 
     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
-        ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 10, 1);
+                              ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 10, 1);
     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
-        ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 11, 1);
+                              ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 11, 1);
 
     /* STMM output surface (index = 20) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
-        obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
-        I965_SURFACEFORMAT_R8_UNORM, 20, 1);
+                              obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
+                              I965_SURFACEFORMAT_R8_UNORM, 20, 1);
 
     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
 
@@ -3568,10 +3564,10 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context
 
     if (dn_filter_param) {
         float value = dn_filter_param->value;
-        
+
         if (value > 1.0)
             value = 1.0;
-        
+
         if (value < 0.0)
             value = 0.0;
 
@@ -3711,7 +3707,7 @@ pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context
     pp_dn_context->dest_h = h;
 
     dst_surface->flags = src_surface->flags;
-    
+
     return VA_STATUS_SUCCESS;
 }
 
@@ -3736,8 +3732,8 @@ gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context
 {
     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
 
-    pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
-    pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
+    pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16;
+    pp_inline_parameter->grf9.destination_block_vertical_origin = y * 4;
 
     return 0;
 }
@@ -3757,9 +3753,9 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c
     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
         filter_param;
     struct object_surface * const src_obj_surface = (struct object_surface *)
-        src_surface->base;
+                                                    src_surface->base;
     struct object_surface * const dst_obj_surface = (struct object_surface *)
-        dst_surface->base;
+                                                    dst_surface->base;
     struct object_surface *obj_surface;
     struct gen7_sampler_dndi *sampler_dndi;
     int index, dndi_top_first;
@@ -3767,37 +3763,37 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c
     VAStatus status;
 
     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
-        pipe_params, deint_params);
+                                                 pipe_params, deint_params);
     if (status != VA_STATUS_SUCCESS)
         return status;
 
     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
-        src_obj_surface, dst_obj_surface);
+                                             src_obj_surface, dst_obj_surface);
     if (status != VA_STATUS_SUCCESS)
         return status;
 
     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
-        src_obj_surface, dst_obj_surface);
+                                                     src_obj_surface, dst_obj_surface);
     if (status != VA_STATUS_SUCCESS)
         return status;
 
     /* Current input surface (index = 3) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
-        obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
-        0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 3);
+                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
+                               0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 3);
 
     /* Previous input surface (index = 4) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
-        obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
-        0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
+                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
+                               0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
 
     /* STMM input surface (index = 5) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
-        obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
-        I965_SURFACEFORMAT_R8_UNORM, 5, 1);
+                              obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
+                              I965_SURFACEFORMAT_R8_UNORM, 5, 1);
 
     /* Previous output surfaces (index = { 27, 28 }) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
@@ -3807,9 +3803,9 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c
     orig_h = obj_surface->orig_height;
 
     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
-        ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1);
+                              ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1);
     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
-        ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1);
+                              ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1);
 
     /* Current output surfaces (index = { 30, 31 }) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
@@ -3819,15 +3815,15 @@ gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_c
     orig_h = obj_surface->orig_height;
 
     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
-        ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1);
+                              ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1);
     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
-        ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1);
+                              ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1);
 
     /* STMM output surface (index = 33) */
     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
-        obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
-        I965_SURFACEFORMAT_R8_UNORM, 33, 1);
+                              obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
+                              I965_SURFACEFORMAT_R8_UNORM, 33, 1);
 
     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
 
@@ -3987,10 +3983,10 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con
 
     if (dn_filter_param) {
         float value = dn_filter_param->value;
-        
+
         if (value > 1.0)
             value = 1.0;
-        
+
         if (value < 0.0)
             value = 0.0;
 
@@ -4005,10 +4001,10 @@ gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_con
     h = obj_surface->height;
 
     if (pp_dn_context->stmm_bo == NULL) {
-        pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
-                                             "STMM surface",
-                                             w * h,
-                                             4096);
+        pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
+                                              "STMM surface",
+                                              w * h,
+                                              4096);
         assert(pp_dn_context->stmm_bo);
     }
 
@@ -4184,23 +4180,23 @@ ironlake_pp_initialize(
     dri_bo_unreference(pp_context->curbe.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
                       "constant buffer",
-                      4096, 
+                      4096,
                       4096);
     assert(bo);
     pp_context->curbe.bo = bo;
 
     dri_bo_unreference(pp_context->idrt.bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "interface discriptor", 
-                      sizeof(struct i965_interface_descriptor), 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "interface discriptor",
+                      sizeof(struct i965_interface_descriptor),
                       4096);
     assert(bo);
     pp_context->idrt.bo = bo;
     pp_context->idrt.num_interface_descriptors = 0;
 
     dri_bo_unreference(pp_context->sampler_state_table.bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "sampler state table", 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "sampler state table",
                       4096,
                       4096);
     assert(bo);
@@ -4210,7 +4206,7 @@ ironlake_pp_initialize(
     pp_context->sampler_state_table.bo = bo;
 
     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
                       "sampler 8x8 state ",
                       4096,
                       4096);
@@ -4218,7 +4214,7 @@ ironlake_pp_initialize(
     pp_context->sampler_state_table.bo_8x8 = bo;
 
     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
                       "sampler 8x8 state ",
                       4096,
                       4096);
@@ -4226,9 +4222,9 @@ ironlake_pp_initialize(
     pp_context->sampler_state_table.bo_8x8_uv = bo;
 
     dri_bo_unreference(pp_context->vfe_state.bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "vfe state", 
-                      sizeof(struct i965_vfe_state), 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vfe state",
+                      sizeof(struct i965_vfe_state),
                       4096);
     assert(bo);
     pp_context->vfe_state.bo = bo;
@@ -4238,11 +4234,11 @@ ironlake_pp_initialize(
 
     memset(pp_context->pp_static_parameter, 0, static_param_size);
     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
-    
+
     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
     pp_context->current_pp = pp_index;
     pp_module = &pp_context->pp_modules[pp_index];
-    
+
     if (pp_module->initialize)
         va_status = pp_module->initialize(ctx, pp_context,
                                           src_surface,
@@ -4315,23 +4311,23 @@ gen6_pp_initialize(
     dri_bo_unreference(pp_context->curbe.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
                       "constant buffer",
-                      4096, 
+                      4096,
                       4096);
     assert(bo);
     pp_context->curbe.bo = bo;
 
     dri_bo_unreference(pp_context->idrt.bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "interface discriptor", 
-                      sizeof(struct gen6_interface_descriptor_data), 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "interface discriptor",
+                      sizeof(struct gen6_interface_descriptor_data),
                       4096);
     assert(bo);
     pp_context->idrt.bo = bo;
     pp_context->idrt.num_interface_descriptors = 0;
 
     dri_bo_unreference(pp_context->sampler_state_table.bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "sampler state table", 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "sampler state table",
                       4096,
                       4096);
     assert(bo);
@@ -4341,7 +4337,7 @@ gen6_pp_initialize(
     pp_context->sampler_state_table.bo = bo;
 
     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
                       "sampler 8x8 state ",
                       4096,
                       4096);
@@ -4349,7 +4345,7 @@ gen6_pp_initialize(
     pp_context->sampler_state_table.bo_8x8 = bo;
 
     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
                       "sampler 8x8 state ",
                       4096,
                       4096);
@@ -4357,13 +4353,13 @@ gen6_pp_initialize(
     pp_context->sampler_state_table.bo_8x8_uv = bo;
 
     dri_bo_unreference(pp_context->vfe_state.bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "vfe state", 
-                      sizeof(struct i965_vfe_state), 
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vfe state",
+                      sizeof(struct i965_vfe_state),
                       4096);
     assert(bo);
     pp_context->vfe_state.bo = bo;
-    
+
     if (IS_GEN7(i965->intel.device_info)) {
         static_param_size = sizeof(struct gen7_pp_static_parameter);
         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
@@ -4378,7 +4374,7 @@ gen6_pp_initialize(
     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
     pp_context->current_pp = pp_index;
     pp_module = &pp_context->pp_modules[pp_index];
-    
+
     if (pp_module->initialize)
         va_status = pp_module->initialize(ctx, pp_context,
                                           src_surface,
@@ -4409,19 +4405,19 @@ gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
     assert(bo->virtual);
     desc = bo->virtual;
     memset(desc, 0, sizeof(*desc));
-    desc->desc0.kernel_start_pointer = 
+    desc->desc0.kernel_start_pointer =
         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
     desc->desc1.single_program_flow = 1;
     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
-    desc->desc2.sampler_state_pointer = 
+    desc->desc2.sampler_state_pointer =
         pp_context->sampler_state_table.bo->offset >> 5;
     desc->desc3.binding_table_entry_count = 0;
     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
     desc->desc4.constant_urb_entry_read_offset = 0;
 
     if (IS_GEN7(i965->intel.device_info))
-        desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
+        desc->desc4.constant_urb_entry_read_length = 8; /* grf 1-8 */
     else
         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
 
@@ -4450,7 +4446,7 @@ gen6_pp_upload_constants(VADriverContextP ctx,
     int param_size;
 
     assert(sizeof(struct pp_static_parameter) == 128);
-    assert(sizeof(struct gen7_pp_static_parameter) == 192);
+    assert(sizeof(struct gen7_pp_static_parameter) == 256);
 
     if (IS_GEN7(i965->intel.device_info))
         param_size = sizeof(struct gen7_pp_static_parameter);
@@ -4517,10 +4513,10 @@ gen6_pp_vfe_state(VADriverContextP ctx,
               pp_context->vfe_gpu_state.num_urb_entries << 8);
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch,
-              (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
-               /* URB Entry Allocation Size, in 256 bits unit */
+              (pp_context->vfe_gpu_state.urb_entry_size) << 16 |
+              /* URB Entry Allocation Size, in 256 bits unit */
               (pp_context->vfe_gpu_state.curbe_allocation_size));
-               /* CURBE Allocation Size, in 256 bits unit */
+    /* CURBE Allocation Size, in 256 bits unit */
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch, 0);
@@ -4545,7 +4541,7 @@ gen6_pp_curbe_load(VADriverContextP ctx,
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch,
               param_size);
-    OUT_RELOC(batch, 
+    OUT_RELOC(batch,
               pp_context->curbe.bo,
               I915_GEM_DOMAIN_INSTRUCTION, 0,
               0);
@@ -4563,14 +4559,14 @@ gen6_interface_descriptor_load(VADriverContextP ctx,
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch,
               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
-    OUT_RELOC(batch, 
+    OUT_RELOC(batch,
               pp_context->idrt.bo,
               I915_GEM_DOMAIN_INSTRUCTION, 0,
               0);
     ADVANCE_BATCH(batch);
 }
 
-static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
+static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps)
 {
     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
 
@@ -4583,10 +4579,9 @@ static void update_block_mask_parameter(struct i965_post_processing_context *pp_
 
     /* 1 x N */
     if (x_steps == 1) {
-        if (y == y_steps-1) {
+        if (y == y_steps - 1) {
             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
-        }
-        else {
+        } else {
             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
         }
     }
@@ -4595,13 +4590,11 @@ static void update_block_mask_parameter(struct i965_post_processing_context *pp_
     if (y_steps == 1) {
         if (x == 0) { // all blocks in this group are on the left edge
             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
-            pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
-        }
-        else if (x == x_steps-1) {
+            pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left;
+        } else if (x == x_steps - 1) {
             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
-        }
-        else {
+        } else {
             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
@@ -4642,8 +4635,8 @@ gen6_pp_object_walker(VADriverContextP ctx,
             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
                 // some common block parameter update goes here, apply to all pp functions
                 if (IS_GEN6(i965->intel.device_info))
-                    update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
-                
+                    update_block_mask_parameter(pp_context, x, y, x_steps, y_steps);
+
                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
                 *command_ptr++ = 0;
                 *command_ptr++ = 0;
@@ -4673,7 +4666,7 @@ gen6_pp_object_walker(VADriverContextP ctx,
     dri_bo_unreference(command_buffer);
 
     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
-     * will cause control to pass back to ring buffer 
+     * will cause control to pass back to ring buffer
      */
     intel_batchbuffer_end_atomic(batch);
     intel_batchbuffer_flush(batch);
@@ -4710,7 +4703,7 @@ gen6_post_processing(
 )
 {
     VAStatus va_status;
-    
+
     va_status = gen6_pp_initialize(ctx, pp_context,
                                    src_surface,
                                    src_rect,
@@ -4746,9 +4739,9 @@ i965_post_processing_internal(
 
     if (pp_context && pp_context->intel_post_processing) {
         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
-                          src_surface, src_rect,
-                          dst_surface, dst_rect,
-                          pp_index, filter_param);
+                                                        src_surface, src_rect,
+                                                        dst_surface, dst_rect,
+                                                        pp_index, filter_param);
     } else {
         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
     }
@@ -4766,14 +4759,14 @@ rgb_to_yuv(unsigned int argb,
     int r = ((argb >> 16) & 0xff);
     int g = ((argb >> 8) & 0xff);
     int b = ((argb >> 0) & 0xff);
-    
+
     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
     *a = ((argb >> 24) & 0xff);
 }
 
-static void 
+static void
 i965_vpp_clear_surface(VADriverContextP ctx,
                        struct i965_post_processing_context *pp_context,
                        struct object_surface *obj_surface,
@@ -4830,7 +4823,7 @@ i965_vpp_clear_surface(VADriverContextP ctx,
     OUT_BATCH(batch,
               region_height << 16 |
               region_width);
-    OUT_RELOC(batch, obj_surface->bo, 
+    OUT_RELOC(batch, obj_surface->bo,
               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
               0);
     OUT_BATCH(batch, y);
@@ -4854,7 +4847,7 @@ i965_vpp_clear_surface(VADriverContextP ctx,
     OUT_BATCH(batch,
               region_height << 16 |
               region_width);
-    OUT_RELOC(batch, obj_surface->bo, 
+    OUT_RELOC(batch, obj_surface->bo,
               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
               obj_surface->width * obj_surface->y_cb_offset);
     OUT_BATCH(batch, v << 8 | u);
@@ -4874,7 +4867,7 @@ i965_scaling_processing(
 {
     VAStatus va_status = VA_STATUS_SUCCESS;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
+
     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
 
@@ -4884,26 +4877,26 @@ i965_scaling_processing(
         struct i965_post_processing_context *pp_context;
         unsigned int filter_flags;
 
-         _i965LockMutex(&i965->pp_mutex);
+        _i965LockMutex(&i965->pp_mutex);
 
-         src_surface.base = (struct object_base *)src_surface_obj;
-         src_surface.type = I965_SURFACE_TYPE_SURFACE;
-         src_surface.flags = I965_SURFACE_FLAG_FRAME;
-         dst_surface.base = (struct object_base *)dst_surface_obj;
-         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
-         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
+        src_surface.base = (struct object_base *)src_surface_obj;
+        src_surface.type = I965_SURFACE_TYPE_SURFACE;
+        src_surface.flags = I965_SURFACE_FLAG_FRAME;
+        dst_surface.base = (struct object_base *)dst_surface_obj;
+        dst_surface.type = I965_SURFACE_TYPE_SURFACE;
+        dst_surface.flags = I965_SURFACE_FLAG_FRAME;
 
-         pp_context = i965->pp_context;
-         filter_flags = pp_context->filter_flags;
-         pp_context->filter_flags = va_flags;
+        pp_context = i965->pp_context;
+        filter_flags = pp_context->filter_flags;
+        pp_context->filter_flags = va_flags;
 
-         va_status = i965_post_processing_internal(ctx, pp_context,
-             &src_surface, src_rect, &dst_surface, dst_rect,
-             avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
+        va_status = i965_post_processing_internal(ctx, pp_context,
+                                                  &src_surface, src_rect, &dst_surface, dst_rect,
+                                                  avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
 
-         pp_context->filter_flags = filter_flags;
+        pp_context->filter_flags = filter_flags;
 
-         _i965UnlockMutex(&i965->pp_mutex);
+        _i965UnlockMutex(&i965->pp_mutex);
     }
 
     return va_status;
@@ -4923,7 +4916,7 @@ i965_post_processing(
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     VASurfaceID out_surface_id = VA_INVALID_ID;
     VASurfaceID tmp_id = VA_INVALID_ID;
-    
+
     *has_done_scaling = 0;
 
     if (HAS_VPP(i965)) {
@@ -4942,8 +4935,6 @@ i965_post_processing(
         pp_context->filter_flags = va_flags;
         if (avs_is_needed(va_flags)) {
             VARectangle tmp_dst_rect;
-            struct i965_render_state *render_state = &i965->render_state;
-            struct intel_region *dest_region = render_state->draw_region;
 
             if (out_surface_id != VA_INVALID_ID)
                 tmp_id = out_surface_id;
@@ -4982,7 +4973,7 @@ i965_post_processing(
 
             if (tmp_id != VA_INVALID_ID)
                 i965_DestroySurfaces(ctx, &tmp_id, 1);
-                
+
             *has_done_scaling = 1;
             calibrated_rect->x = 0;
             calibrated_rect->y = 0;
@@ -4994,7 +4985,7 @@ i965_post_processing(
     }
 
     return out_surface_id;
-}       
+}
 
 static VAStatus
 i965_image_pl2_processing(VADriverContextP ctx,
@@ -5005,7 +4996,7 @@ i965_image_pl2_processing(VADriverContextP ctx,
 
 static VAStatus
 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
-                                   VAStatus (*i965_image_plx_nv12_processing)(
+                                   VAStatus(*i965_image_plx_nv12_processing)(
                                        VADriverContextP,
                                        const struct i965_surface *,
                                        const VARectangle *,
@@ -5072,6 +5063,16 @@ i965_image_pl1_rgbx_processing(VADriverContextP ctx,
     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
     VAStatus vaStatus;
 
+    vaStatus = intel_common_scaling_post_processing(ctx,
+                                                    pp_context,
+                                                    src_surface,
+                                                    src_rect,
+                                                    dst_surface,
+                                                    dst_rect);
+
+    if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
+        return vaStatus;
+
     switch (fourcc) {
     case VA_FOURCC_NV12:
         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
@@ -5109,6 +5110,16 @@ i965_image_pl3_processing(VADriverContextP ctx,
     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
 
+    vaStatus = intel_common_scaling_post_processing(ctx,
+                                                    pp_context,
+                                                    src_surface,
+                                                    src_rect,
+                                                    dst_surface,
+                                                    dst_rect);
+
+    if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
+        return vaStatus;
+
     switch (fourcc) {
     case VA_FOURCC_NV12:
         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
@@ -5172,6 +5183,16 @@ i965_image_pl2_processing(VADriverContextP ctx,
     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
 
+    vaStatus = intel_common_scaling_post_processing(ctx,
+                                                    pp_context,
+                                                    src_surface,
+                                                    src_rect,
+                                                    dst_surface,
+                                                    dst_rect);
+
+    if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
+        return vaStatus;
+
     switch (fourcc) {
     case VA_FOURCC_NV12:
         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
@@ -5241,6 +5262,16 @@ i965_image_pl1_processing(VADriverContextP ctx,
     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
     VAStatus vaStatus;
 
+    vaStatus = intel_common_scaling_post_processing(ctx,
+                                                    pp_context,
+                                                    src_surface,
+                                                    src_rect,
+                                                    dst_surface,
+                                                    dst_rect);
+
+    if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
+        return vaStatus;
+
     switch (fourcc) {
     case VA_FOURCC_NV12:
         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
@@ -5289,6 +5320,192 @@ i965_image_pl1_processing(VADriverContextP ctx,
     return vaStatus;
 }
 
+// it only support NV12 and P010 for vebox proc ctx
+static struct object_surface *derive_surface(VADriverContextP ctx,
+                                             struct object_image *obj_image,
+                                             struct object_surface *obj_surface)
+{
+    VAImage * const image = &obj_image->image;
+
+    memset((void *)obj_surface, 0, sizeof(*obj_surface));
+    obj_surface->fourcc = image->format.fourcc;
+    obj_surface->orig_width = image->width;
+    obj_surface->orig_height = image->height;
+    obj_surface->width = image->pitches[0];
+    obj_surface->height = image->height;
+    obj_surface->y_cb_offset = image->offsets[1] / obj_surface->width;
+    obj_surface->y_cr_offset = obj_surface->y_cb_offset;
+    obj_surface->bo = obj_image->bo;
+    obj_surface->subsampling = SUBSAMPLE_YUV420;
+
+    return obj_surface;
+}
+
+static VAStatus
+vebox_processing_simple(VADriverContextP ctx,
+                        struct i965_post_processing_context *pp_context,
+                        struct object_surface *src_obj_surface,
+                        struct object_surface *dst_obj_surface,
+                        const VARectangle *rect)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    VAProcPipelineParameterBuffer pipeline_param;
+    VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
+
+    if (pp_context->vebox_proc_ctx == NULL) {
+        pp_context->vebox_proc_ctx = gen75_vebox_context_init(ctx);
+    }
+
+    memset((void *)&pipeline_param, 0, sizeof(pipeline_param));
+    pipeline_param.surface_region = rect;
+    pipeline_param.output_region = rect;
+    pipeline_param.filter_flags = 0;
+    pipeline_param.num_filters  = 0;
+
+    pp_context->vebox_proc_ctx->pipeline_param = &pipeline_param;
+    pp_context->vebox_proc_ctx->surface_input_object = src_obj_surface;
+    pp_context->vebox_proc_ctx->surface_output_object = dst_obj_surface;
+
+    if (IS_GEN9(i965->intel.device_info))
+        status = gen9_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
+    else if (IS_GEN10(i965->intel.device_info))
+        status = gen10_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
+
+    return status;
+}
+
+static VAStatus
+i965_image_p010_processing(VADriverContextP ctx,
+                           const struct i965_surface *src_surface,
+                           const VARectangle *src_rect,
+                           struct i965_surface *dst_surface,
+                           const VARectangle *dst_rect)
+{
+#define HAS_VPP_P010(ctx)        ((ctx)->codec_info->has_vpp_p010 && \
+                                     (ctx)->intel.has_bsd)
+
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_post_processing_context *pp_context = i965->pp_context;
+    struct object_surface *src_obj_surface = NULL, *dst_obj_surface = NULL;
+    struct object_surface tmp_src_obj_surface, tmp_dst_obj_surface;
+    struct object_surface *tmp_surface = NULL;
+    VASurfaceID tmp_surface_id[3], out_surface_id = VA_INVALID_ID;
+    int num_tmp_surfaces = 0;
+    int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+    VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
+    int vpp_post = 0;
+
+    vaStatus = intel_common_scaling_post_processing(ctx,
+                                                    pp_context,
+                                                    src_surface,
+                                                    src_rect,
+                                                    dst_surface,
+                                                    dst_rect);
+
+    if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
+        return vaStatus;
+
+    if (HAS_VPP_P010(i965)) {
+        vpp_post = 0;
+        switch (fourcc) {
+        case VA_FOURCC_NV12:
+            if (src_rect->x != dst_rect->x ||
+                src_rect->y != dst_rect->y ||
+                src_rect->width != dst_rect->width ||
+                src_rect->height != dst_rect->height) {
+                vpp_post = 1;
+            }
+            break;
+        case VA_FOURCC_P010:
+            // don't support scaling while the fourcc of dst_surface is P010
+            if (src_rect->x != dst_rect->x ||
+                src_rect->y != dst_rect->y ||
+                src_rect->width != dst_rect->width ||
+                src_rect->height != dst_rect->height) {
+                vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
+                goto EXIT;
+            }
+            break;
+        default:
+            vpp_post = 1;
+            break;
+        }
+
+        if (src_surface->type == I965_SURFACE_TYPE_IMAGE) {
+            src_obj_surface = derive_surface(ctx, (struct object_image *)src_surface->base,
+                                             &tmp_src_obj_surface);
+        } else
+            src_obj_surface = (struct object_surface *)src_surface->base;
+
+        if (src_obj_surface == NULL) {
+            vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
+            goto EXIT;
+        }
+
+        if (vpp_post == 1) {
+            vaStatus = i965_CreateSurfaces(ctx,
+                                           src_obj_surface->orig_width,
+                                           src_obj_surface->orig_height,
+                                           VA_RT_FORMAT_YUV420,
+                                           1,
+                                           &out_surface_id);
+            assert(vaStatus == VA_STATUS_SUCCESS);
+            tmp_surface_id[num_tmp_surfaces++] = out_surface_id;
+            tmp_surface = SURFACE(out_surface_id);
+            assert(tmp_surface);
+            i965_check_alloc_surface_bo(ctx, tmp_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+        }
+
+        if (tmp_surface != NULL)
+            dst_obj_surface = tmp_surface;
+        else {
+            if (dst_surface->type == I965_SURFACE_TYPE_IMAGE) {
+                dst_obj_surface = derive_surface(ctx, (struct object_image *)dst_surface->base,
+                                                 &tmp_dst_obj_surface);
+            } else
+                dst_obj_surface = (struct object_surface *)dst_surface->base;
+        }
+
+        if (dst_obj_surface == NULL) {
+            vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
+            goto EXIT;
+        }
+
+        vaStatus = vebox_processing_simple(ctx,
+                                           pp_context,
+                                           src_obj_surface,
+                                           dst_obj_surface,
+                                           src_rect);
+        if (vaStatus != VA_STATUS_SUCCESS)
+            goto EXIT;
+
+        if (vpp_post == 1) {
+            struct i965_surface src_surface_new;
+
+            if (tmp_surface != NULL) {
+                src_surface_new.base = (struct object_base *)tmp_surface;
+                src_surface_new.type = I965_SURFACE_TYPE_SURFACE;
+                src_surface_new.flags = I965_SURFACE_FLAG_FRAME;
+            } else
+                memcpy((void *)&src_surface_new, (void *)src_surface, sizeof(src_surface_new));
+
+            vaStatus = i965_image_pl2_processing(ctx,
+                                                 &src_surface_new,
+                                                 src_rect,
+                                                 dst_surface,
+                                                 dst_rect);
+        }
+    }
+
+EXIT:
+    if (num_tmp_surfaces)
+        i965_DestroySurfaces(ctx,
+                             tmp_surface_id,
+                             num_tmp_surfaces);
+
+    return vaStatus;
+}
+
 VAStatus
 i965_image_processing(VADriverContextP ctx,
                       const struct i965_surface *src_surface,
@@ -5341,25 +5558,32 @@ i965_image_processing(VADriverContextP ctx,
         case VA_FOURCC_RGBA:
         case VA_FOURCC_RGBX:
             status = i965_image_pl1_rgbx_processing(ctx,
-                                               src_surface,
-                                               src_rect,
-                                               dst_surface,
-                                               dst_rect);
+                                                    src_surface,
+                                                    src_rect,
+                                                    dst_surface,
+                                                    dst_rect);
+            break;
+        case VA_FOURCC_P010:
+            status = i965_image_p010_processing(ctx,
+                                                src_surface,
+                                                src_rect,
+                                                dst_surface,
+                                                dst_rect);
             break;
         default:
             status = VA_STATUS_ERROR_UNIMPLEMENTED;
             break;
         }
-        
+
         _i965UnlockMutex(&i965->pp_mutex);
     }
 
     return status;
-}       
+}
 
 static void
 i965_post_processing_context_finalize(VADriverContextP ctx,
-    struct i965_post_processing_context *pp_context)
+                                      struct i965_post_processing_context *pp_context)
 {
     int i;
 
@@ -5387,7 +5611,7 @@ i965_post_processing_context_finalize(VADriverContextP ctx,
 
     for (i = 0; i < ARRAY_ELEMS(pp_context->pp_dndi_context.frame_store); i++)
         pp_dndi_frame_store_clear(&pp_context->pp_dndi_context.frame_store[i],
-            ctx);
+                                  ctx);
 
     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
     pp_context->pp_dn_context.stmm_bo = NULL;
@@ -5419,7 +5643,7 @@ i965_post_processing_terminate(VADriverContextP ctx)
     i965->pp_context = NULL;
 }
 
-#define VPP_CURBE_ALLOCATION_SIZE      32
+#define VPP_CURBE_ALLOCATION_SIZE   32
 
 void
 i965_post_processing_context_init(VADriverContextP ctx,
@@ -5432,23 +5656,23 @@ i965_post_processing_context_init(VADriverContextP ctx,
     const AVSConfig *avs_config;
 
     if (IS_IRONLAKE(i965->intel.device_info)) {
-       pp_context->urb.size = i965->intel.device_info->urb_size;
-       pp_context->urb.num_vfe_entries = 32;
-       pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
-       pp_context->urb.num_cs_entries = 1;
+        pp_context->urb.size = i965->intel.device_info->urb_size;
+        pp_context->urb.num_vfe_entries = 32;
+        pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
+        pp_context->urb.num_cs_entries = 1;
         pp_context->urb.size_cs_entry = 2;
-       pp_context->urb.vfe_start = 0;
-       pp_context->urb.cs_start = pp_context->urb.vfe_start + 
-            pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
-       assert(pp_context->urb.cs_start +
-           pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
+        pp_context->urb.vfe_start = 0;
+        pp_context->urb.cs_start = pp_context->urb.vfe_start +
+                                   pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
+        assert(pp_context->urb.cs_start +
+               pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
         pp_context->intel_post_processing = ironlake_post_processing;
     } else {
-       pp_context->vfe_gpu_state.max_num_threads = 60;
-       pp_context->vfe_gpu_state.num_urb_entries = 59;
-       pp_context->vfe_gpu_state.gpgpu_mode = 0;
-       pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
-       pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
+        pp_context->vfe_gpu_state.max_num_threads = 60;
+        pp_context->vfe_gpu_state.num_urb_entries = 59;
+        pp_context->vfe_gpu_state.gpgpu_mode = 0;
+        pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
+        pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
         pp_context->intel_post_processing = gen6_post_processing;
     }
 
@@ -5496,7 +5720,7 @@ i965_post_processing_context_init(VADriverContextP ctx,
     pp_dndi_context_init(&pp_context->pp_dndi_context);
 
     avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
-        &gen6_avs_config;
+                 &gen6_avs_config;
     avs_init_state(&pp_context->pp_avs_context.state, avs_config);
 }
 
@@ -5541,7 +5765,7 @@ enum {
 
 static int
 pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
-    uint32_t filter_flags)
+                    uint32_t filter_flags)
 {
     int pp_index = -1;
 
@@ -5580,7 +5804,7 @@ pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
         case VA_FOURCC_NV12:
             if (pp_ops & PP_OP_CHANGE_SIZE)
                 pp_index = avs_is_needed(filter_flags) ?
-                    PP_NV12_AVS : PP_NV12_SCALING;
+                           PP_NV12_AVS : PP_NV12_SCALING;
             else
                 pp_index = PP_NV12_LOAD_SAVE_N12;
             break;
@@ -5633,7 +5857,7 @@ pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
 
 static VAStatus
 i965_proc_picture_fast(VADriverContextP ctx,
-    struct i965_proc_context *proc_context, struct proc_state *proc_state)
+                       struct i965_proc_context *proc_context, struct proc_state *proc_state)
 {
     struct i965_driver_data * const i965 = i965_driver_data(ctx);
     const VAProcPipelineParameterBuffer * const pipeline_param =
@@ -5660,7 +5884,7 @@ i965_proc_picture_fast(VADriverContextP ctx,
             return VA_STATUS_ERROR_INVALID_PARAMETER;
 
         filter = (VAProcFilterParameterBuffer *)
-            obj_buffer->buffer_store->buffer;
+                 obj_buffer->buffer_store->buffer;
         switch (filter->type) {
         case VAProcFilterDeinterlacing:
             pp_ops |= PP_OP_DEINTERLACE;
@@ -5681,6 +5905,9 @@ i965_proc_picture_fast(VADriverContextP ctx,
     if (!src_obj_surface->fourcc)
         return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
 
+    if (!src_obj_surface->bo)
+        return VA_STATUS_ERROR_INVALID_SURFACE;
+
     if (pipeline_param->surface_region) {
         src_rect.x = pipeline_param->surface_region->x;
         src_rect.y = pipeline_param->surface_region->y;
@@ -5699,13 +5926,12 @@ i965_proc_picture_fast(VADriverContextP ctx,
 
     if (pp_ops & PP_OP_DEINTERLACE) {
         filter_flags |= !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD) ?
-            VA_TOP_FIELD : VA_BOTTOM_FIELD;
+                        VA_TOP_FIELD : VA_BOTTOM_FIELD;
         if (deint_params->algorithm != VAProcDeinterlacingBob)
             pp_ops |= PP_OP_COMPLEX;
-    }
-    else if (pipeline_param->filter_flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
+    } else if (pipeline_param->filter_flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
         filter_flags |= (pipeline_param->filter_flags & VA_TOP_FIELD) ?
-            VA_TOP_FIELD : VA_BOTTOM_FIELD;
+                        VA_TOP_FIELD : VA_BOTTOM_FIELD;
         pp_ops |= PP_OP_DEINTERLACE;
     }
     if (pp_ops & PP_OP_DEINTERLACE) // XXX: no bob-deinterlacing optimization yet
@@ -5716,6 +5942,9 @@ i965_proc_picture_fast(VADriverContextP ctx,
     if (!dst_obj_surface)
         return VA_STATUS_ERROR_INVALID_SURFACE;
 
+    if (!dst_obj_surface->bo)
+        return VA_STATUS_ERROR_INVALID_SURFACE;
+
     if (dst_obj_surface->fourcc &&
         dst_obj_surface->fourcc != src_obj_surface->fourcc)
         pp_ops |= PP_OP_CHANGE_FORMAT;
@@ -5747,8 +5976,7 @@ i965_proc_picture_fast(VADriverContextP ctx,
     if (pipeline_param->pipeline_flags & VA_PROC_PIPELINE_FAST) {
         filter_flags &= ~VA_FILTER_SCALING_MASK;
         filter_flags |= VA_FILTER_SCALING_FAST;
-    }
-    else {
+    } else {
         if (pp_ops & PP_OP_COMPLEX)
             return VA_STATUS_ERROR_UNIMPLEMENTED; // full pipeline is needed
         if ((filter_flags & VA_FILTER_SCALING_MASK) > VA_FILTER_SCALING_HQ)
@@ -5756,20 +5984,20 @@ i965_proc_picture_fast(VADriverContextP ctx,
     }
 
     pp_index = pp_get_kernel_index(src_obj_surface->fourcc,
-        dst_obj_surface->fourcc, pp_ops, filter_flags);
+                                   dst_obj_surface->fourcc, pp_ops, filter_flags);
     if (pp_index < 0)
         return VA_STATUS_ERROR_UNIMPLEMENTED;
 
     proc_context->pp_context.filter_flags = filter_flags;
     status = i965_post_processing_internal(ctx, &proc_context->pp_context,
-        &src_surface, &src_rect, &dst_surface, &dst_rect, pp_index, NULL);
+                                           &src_surface, &src_rect, &dst_surface, &dst_rect, pp_index, NULL);
     intel_batchbuffer_flush(proc_context->pp_context.batch);
     return status;
 }
 
-VAStatus 
-i965_proc_picture(VADriverContextP ctx, 
-                  VAProfile profile, 
+VAStatus
+i965_proc_picture(VADriverContextP ctx,
+                  VAProfile profile,
                   union codec_state *codec_state,
                   struct hw_context *hw_context)
 {
@@ -5787,16 +6015,40 @@ i965_proc_picture(VADriverContextP ctx,
     unsigned int tiling = 0, swizzle = 0;
     int in_width, in_height;
 
-    status = i965_proc_picture_fast(ctx, proc_context, proc_state);
-    if (status != VA_STATUS_ERROR_UNIMPLEMENTED)
-        return status;
-
     if (pipeline_param->surface == VA_INVALID_ID ||
         proc_state->current_render_target == VA_INVALID_ID) {
         status = VA_STATUS_ERROR_INVALID_SURFACE;
         goto error;
     }
 
+    obj_surface = SURFACE(proc_state->current_render_target);
+    if (!obj_surface)
+        return VA_STATUS_ERROR_INVALID_SURFACE;
+
+    if (!obj_surface->bo) {
+        unsigned int expected_format = obj_surface->expected_format;
+        int fourcc = 0;
+        int subsample = 0;
+        int tiling = HAS_TILED_SURFACE(i965);
+        switch (expected_format) {
+        case VA_RT_FORMAT_YUV420:
+            fourcc = VA_FOURCC_NV12;
+            subsample = SUBSAMPLE_YUV420;
+            break;
+        case VA_RT_FORMAT_YUV420_10BPP:
+            fourcc = VA_FOURCC_P010;
+            subsample = SUBSAMPLE_YUV420;
+            break;
+        case VA_RT_FORMAT_RGB32:
+            fourcc = VA_FOURCC_RGBA;
+            subsample = SUBSAMPLE_RGBX;
+            break;
+        default:
+            return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+        }
+        i965_check_alloc_surface_bo(ctx, obj_surface, tiling, fourcc, subsample);
+    }
+
     obj_surface = SURFACE(pipeline_param->surface);
 
     if (!obj_surface) {
@@ -5814,6 +6066,10 @@ i965_proc_picture(VADriverContextP ctx,
         goto error;
     }
 
+    status = i965_proc_picture_fast(ctx, proc_context, proc_state);
+    if (status != VA_STATUS_ERROR_UNIMPLEMENTED)
+        return status;
+
     in_width = obj_surface->orig_width;
     in_height = obj_surface->orig_height;
     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
@@ -5838,7 +6094,8 @@ i965_proc_picture(VADriverContextP ctx,
                                      VA_RT_FORMAT_YUV420,
                                      1,
                                      &out_surface_id);
-        assert(status == VA_STATUS_SUCCESS);
+        if (status != VA_STATUS_SUCCESS)
+            goto error;
         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
         obj_surface = SURFACE(out_surface_id);
         assert(obj_surface);
@@ -5857,7 +6114,8 @@ i965_proc_picture(VADriverContextP ctx,
                                        &src_rect,
                                        &dst_surface,
                                        &dst_rect);
-        assert(status == VA_STATUS_SUCCESS);
+        if (status != VA_STATUS_SUCCESS)
+            goto error;
 
         src_surface.base = (struct object_base *)obj_surface;
         src_surface.type = I965_SURFACE_TYPE_SURFACE;
@@ -5929,7 +6187,7 @@ i965_proc_picture(VADriverContextP ctx,
 
     proc_context->pp_context.pipeline_param = NULL;
     obj_surface = SURFACE(proc_state->current_render_target);
-    
+
     if (!obj_surface) {
         status = VA_STATUS_ERROR_INVALID_SURFACE;
         goto error;
@@ -5949,7 +6207,8 @@ i965_proc_picture(VADriverContextP ctx,
 
     if (IS_GEN7(i965->intel.device_info) ||
         IS_GEN8(i965->intel.device_info) ||
-        IS_GEN9(i965->intel.device_info)) {
+        IS_GEN9(i965->intel.device_info) ||
+        IS_GEN10(i965->intel.device_info)) {
         unsigned int saved_filter_flag;
         struct i965_post_processing_context *i965pp_context = i965->pp_context;
 
@@ -5966,7 +6225,7 @@ i965_proc_picture(VADriverContextP ctx,
         intel_batchbuffer_flush(hw_context->batch);
 
         saved_filter_flag = i965pp_context->filter_flags;
-        i965pp_context->filter_flags = VA_FILTER_SCALING_HQ;
+        i965pp_context->filter_flags = (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK);
 
         dst_surface.base = (struct object_base *)obj_surface;
         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
@@ -5976,20 +6235,20 @@ i965_proc_picture(VADriverContextP ctx,
 
         if (num_tmp_surfaces)
             i965_DestroySurfaces(ctx,
-                             tmp_surfaces,
-                             num_tmp_surfaces);
+                                 tmp_surfaces,
+                                 num_tmp_surfaces);
 
         return VA_STATUS_SUCCESS;
     }
 
     int csc_needed = 0;
-    if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12){
+    if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12) {
         csc_needed = 1;
         out_surface_id = VA_INVALID_ID;
         status = i965_CreateSurfaces(ctx,
                                      obj_surface->orig_width,
                                      obj_surface->orig_height,
-                                     VA_RT_FORMAT_YUV420, 
+                                     VA_RT_FORMAT_YUV420,
                                      1,
                                      &out_surface_id);
         assert(status == VA_STATUS_SUCCESS);
@@ -6004,7 +6263,7 @@ i965_proc_picture(VADriverContextP ctx,
     }
 
     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
-    i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color); 
+    i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color);
 
     // load/save doesn't support different origin offset for src and dst surface
     if (src_rect.width == dst_rect.width &&
@@ -6038,7 +6297,7 @@ i965_proc_picture(VADriverContextP ctx,
         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
     }
-    
+
     if (num_tmp_surfaces)
         i965_DestroySurfaces(ctx,
                              tmp_surfaces,
@@ -6063,7 +6322,7 @@ i965_proc_context_destroy(void *hw_context)
     struct i965_proc_context * const proc_context = hw_context;
     VADriverContextP const ctx = proc_context->driver_context;
 
-    i965_post_processing_context_finalize(ctx, &proc_context->pp_context);
+    proc_context->pp_context.finalize(ctx, &proc_context->pp_context);
     intel_batchbuffer_free(proc_context->base.batch);
     free(proc_context);
 }