OSDN Git Service

Fix a typo
[android-x86/hardware-intel-common-vaapi.git] / src / i965_gpe_utils.c
index 4c6469b..dcb729b 100644 (file)
@@ -32,6 +32,7 @@
 #include "intel_batchbuffer.h"
 #include "intel_driver.h"
 
+#include "i965_drv_video.h"
 #include "i965_gpe_utils.h"
 
 static void
@@ -88,10 +89,10 @@ gen6_gpe_vfe_state(VADriverContextP ctx,
               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
-    OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
-    OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
-    OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
-       
+    OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
+    OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
+    OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
+
     ADVANCE_BATCH(batch);
 
 }
@@ -142,8 +143,8 @@ i965_gpe_load_kernels(VADriverContextP ctx,
     for (i = 0; i < num_kernels; i++) {
         struct i965_kernel *kernel = &gpe_context->kernels[i];
 
-        kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
-                                  kernel->name, 
+        kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
+                                  kernel->name,
                                   kernel->size,
                                   0x1000);
         assert(kernel->bo);
@@ -220,6 +221,14 @@ gen6_gpe_pipeline_setup(VADriverContextP ctx,
 }
 
 static void
+gen8_gpe_pipeline_end(VADriverContextP ctx,
+                      struct i965_gpe_context *gpe_context,
+                      struct intel_batchbuffer *batch)
+{
+    /* No thing to do */
+}
+
+static void
 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
 {
     switch (tiling) {
@@ -296,6 +305,44 @@ gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling
 }
 
 static void
+gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss0.tiled_surface = 0;
+        ss->ss0.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
+gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss2.tiled_surface = 0;
+        ss->ss2.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
 i965_gpe_set_surface2_state(VADriverContextP ctx,
                             struct object_surface *obj_surface,
                             struct i965_surface_state2 *ss)
@@ -304,7 +351,7 @@ i965_gpe_set_surface2_state(VADriverContextP ctx,
     unsigned int tiling, swizzle;
 
     assert(obj_surface->bo);
-    assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+    assert(obj_surface->fourcc == VA_FOURCC_NV12);
 
     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
     w = obj_surface->orig_width;
@@ -387,7 +434,8 @@ i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
                                 struct i965_gpe_context *gpe_context,
                                 struct object_surface *obj_surface,
                                 unsigned long binding_table_offset,
-                                unsigned long surface_state_offset)
+                                unsigned long surface_state_offset,
+                                int write_enabled)
 {
     struct i965_surface_state *ss;
     dri_bo *bo;
@@ -399,7 +447,7 @@ i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
     i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
     dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 0,
+                      I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
                       0,
                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
                       obj_surface->bo);
@@ -467,7 +515,7 @@ gen7_gpe_set_surface2_state(VADriverContextP ctx,
     unsigned int tiling, swizzle;
 
     assert(obj_surface->bo);
-    assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+    assert(obj_surface->fourcc == VA_FOURCC_NV12);
 
     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
     w = obj_surface->orig_width;
@@ -547,8 +595,8 @@ gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
 
 static void
 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
-                                    struct object_surface *obj_surface,
-                                    struct gen7_surface_state *ss)
+                                         struct object_surface *obj_surface,
+                                         struct gen7_surface_state *ss)
 {
     int w, w_pitch;
     unsigned int tiling, swizzle;
@@ -567,7 +615,7 @@ gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
     ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
     /* ss2 */
     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
-    ss->ss2.height = (obj_surface->height / 2) -1;
+    ss->ss2.height = (obj_surface->height / 2) - 1;
     /* ss3 */
     ss->ss3.pitch = w_pitch - 1;
     gen7_gpe_set_surface_tiling(ss, tiling);
@@ -578,7 +626,8 @@ gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
                                 struct i965_gpe_context *gpe_context,
                                 struct object_surface *obj_surface,
                                 unsigned long binding_table_offset,
-                                unsigned long surface_state_offset)
+                                unsigned long surface_state_offset,
+                                int write_enabled)
 {
     struct gen7_surface_state *ss;
     dri_bo *bo;
@@ -590,7 +639,7 @@ gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
     gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
     dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 0,
+                      I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
                       0,
                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
                       obj_surface->bo);
@@ -601,16 +650,17 @@ gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
 
 void
 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
-                                struct i965_gpe_context *gpe_context,
-                                struct object_surface *obj_surface,
-                                unsigned long binding_table_offset,
-                                unsigned long surface_state_offset)
+                                     struct i965_gpe_context *gpe_context,
+                                     struct object_surface *obj_surface,
+                                     unsigned long binding_table_offset,
+                                     unsigned long surface_state_offset,
+                                     int write_enabled)
 {
     struct gen7_surface_state *ss;
     dri_bo *bo;
     int cbcr_offset;
 
-       assert(obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'));
+    assert(obj_surface->fourcc == VA_FOURCC_NV12);
     bo = gpe_context->surface_state_binding_table.bo;
     dri_bo_map(bo, True);
     assert(bo->virtual);
@@ -619,7 +669,7 @@ gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
     gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
     dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 0,
+                      I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
                       cbcr_offset,
                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
                       obj_surface->bo);
@@ -677,3 +727,2145 @@ gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
     dri_bo_unmap(bo);
 }
+
+static void
+gen8_gpe_set_surface2_state(VADriverContextP ctx,
+                            struct object_surface *obj_surface,
+                            struct gen8_surface_state2 *ss)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int w, h, w_pitch;
+    unsigned int tiling, swizzle;
+
+    assert(obj_surface->bo);
+    assert(obj_surface->fourcc == VA_FOURCC_NV12
+           || obj_surface->fourcc == VA_FOURCC_P010);
+
+    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+    w = obj_surface->orig_width;
+    h = obj_surface->orig_height;
+    w_pitch = obj_surface->width;
+
+    memset(ss, 0, sizeof(*ss));
+    /* ss0 */
+    if (IS_GEN9(i965->intel.device_info) ||
+        IS_GEN10(i965->intel.device_info))
+        ss->ss5.surface_object_mocs = GEN9_CACHE_PTE;
+
+    ss->ss6.base_addr = (uint32_t)obj_surface->bo->offset64;
+    ss->ss7.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
+    /* ss1 */
+    ss->ss1.cbcr_pixel_offset_v_direction = 2;
+    ss->ss1.width = w - 1;
+    ss->ss1.height = h - 1;
+    /* ss2 */
+    ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
+    ss->ss2.interleave_chroma = 1;
+    ss->ss2.pitch = w_pitch - 1;
+    ss->ss2.half_pitch_for_chroma = 0;
+    gen8_gpe_set_surface2_tiling(ss, tiling);
+    /* ss3: UV offset for interleave mode */
+    ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
+    ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
+}
+
+void
+gen8_gpe_surface2_setup(VADriverContextP ctx,
+                        struct i965_gpe_context *gpe_context,
+                        struct object_surface *obj_surface,
+                        unsigned long binding_table_offset,
+                        unsigned long surface_state_offset)
+{
+    struct gen8_surface_state2 *ss;
+    dri_bo *bo;
+
+    bo = gpe_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+
+    ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
+    gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, 0,
+                      0,
+                      surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
+                      obj_surface->bo);
+
+    *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
+    dri_bo_unmap(bo);
+}
+
+static void
+gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
+                                    struct object_surface *obj_surface,
+                                    struct gen8_surface_state *ss)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int w, h, w_pitch;
+    unsigned int tiling, swizzle;
+
+    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+    w = obj_surface->orig_width;
+    h = obj_surface->orig_height;
+    w_pitch = obj_surface->width;
+
+    memset(ss, 0, sizeof(*ss));
+    /* ss0 */
+    if (IS_GEN9(i965->intel.device_info) ||
+        IS_GEN10(i965->intel.device_info))
+        ss->ss1.surface_mocs = GEN9_CACHE_PTE;
+
+    ss->ss0.surface_type = I965_SURFACE_2D;
+    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+    /* ss1 */
+    ss->ss8.base_addr = (uint32_t)obj_surface->bo->offset64;
+    ss->ss9.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
+    /* ss2 */
+    ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
+    ss->ss2.height = h - 1;
+    /* ss3 */
+    ss->ss3.pitch = w_pitch - 1;
+    gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
+                                        struct object_surface *obj_surface,
+                                        struct gen8_surface_state *ss)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int w, w_pitch;
+    unsigned int tiling, swizzle;
+    int cbcr_offset;
+    uint64_t base_offset;
+
+    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+    w = obj_surface->orig_width;
+    w_pitch = obj_surface->width;
+
+    cbcr_offset = obj_surface->height * obj_surface->width;
+    memset(ss, 0, sizeof(*ss));
+    /* ss0 */
+    if (IS_GEN9(i965->intel.device_info) ||
+        IS_GEN10(i965->intel.device_info))
+        ss->ss1.surface_mocs = GEN9_CACHE_PTE;
+
+    ss->ss0.surface_type = I965_SURFACE_2D;
+    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+    /* ss1 */
+    base_offset = obj_surface->bo->offset64 + cbcr_offset;
+    ss->ss8.base_addr = (uint32_t) base_offset;
+    ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
+    /* ss2 */
+    ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
+    ss->ss2.height = (obj_surface->height / 2) - 1;
+    /* ss3 */
+    ss->ss3.pitch = w_pitch - 1;
+    gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+void
+gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
+                                struct i965_gpe_context *gpe_context,
+                                struct object_surface *obj_surface,
+                                unsigned long binding_table_offset,
+                                unsigned long surface_state_offset,
+                                int write_enabled)
+{
+    struct gen8_surface_state *ss;
+    dri_bo *bo;
+
+    bo = gpe_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, True);
+    assert(bo->virtual);
+
+    ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
+    gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
+                      0,
+                      surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+                      obj_surface->bo);
+
+    *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
+    dri_bo_unmap(bo);
+}
+
+void
+gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
+                                    struct i965_gpe_context *gpe_context,
+                                    struct object_surface *obj_surface,
+                                    unsigned long binding_table_offset,
+                                    unsigned long surface_state_offset,
+                                    int write_enabled)
+{
+    struct gen8_surface_state *ss;
+    dri_bo *bo;
+    int cbcr_offset;
+
+    assert(obj_surface->fourcc == VA_FOURCC_NV12
+           || obj_surface->fourcc == VA_FOURCC_P010);
+    bo = gpe_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, True);
+    assert(bo->virtual);
+
+    cbcr_offset = obj_surface->height * obj_surface->width;
+    ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
+    gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
+                      cbcr_offset,
+                      surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+                      obj_surface->bo);
+
+    *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
+    dri_bo_unmap(bo);
+}
+
+
+static void
+gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
+                                  struct i965_buffer_surface *buffer_surface,
+                                  struct gen8_surface_state *ss)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int num_entries;
+
+    assert(buffer_surface->bo);
+    num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
+
+    memset(ss, 0, sizeof(*ss));
+    /* ss0 */
+    ss->ss0.surface_type = I965_SURFACE_BUFFER;
+    if (IS_GEN9(i965->intel.device_info) ||
+        IS_GEN10(i965->intel.device_info))
+        ss->ss1.surface_mocs = GEN9_CACHE_PTE;
+
+    /* ss1 */
+    ss->ss8.base_addr = (uint32_t)buffer_surface->bo->offset64;
+    ss->ss9.base_addr_high = (uint32_t)(buffer_surface->bo->offset64 >> 32);
+    /* ss2 */
+    ss->ss2.width = ((num_entries - 1) & 0x7f);
+    ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
+    /* ss3 */
+    ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
+    ss->ss3.pitch = buffer_surface->pitch - 1;
+}
+
+void
+gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
+                             struct i965_gpe_context *gpe_context,
+                             struct i965_buffer_surface *buffer_surface,
+                             unsigned long binding_table_offset,
+                             unsigned long surface_state_offset)
+{
+    struct gen8_surface_state *ss;
+    dri_bo *bo;
+
+    bo = gpe_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+
+    ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
+    gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                      0,
+                      surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+                      buffer_surface->bo);
+
+    *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
+    dri_bo_unmap(bo);
+}
+
+static void
+gen8_gpe_state_base_address(VADriverContextP ctx,
+                            struct i965_gpe_context *gpe_context,
+                            struct intel_batchbuffer *batch)
+{
+    BEGIN_BATCH(batch, 16);
+
+    OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
+
+    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);              //General State Base Address
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+
+    /*DW4 Surface state base address */
+    OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+
+    /*DW6. Dynamic state base address */
+    if (gpe_context->dynamic_state.bo)
+        OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
+                    I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
+                    0, BASE_ADDRESS_MODIFY);
+    else {
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+        OUT_BATCH(batch, 0);
+    }
+
+
+    /*DW8. Indirect Object base address */
+    if (gpe_context->indirect_state.bo)
+        OUT_RELOC64(batch, gpe_context->indirect_state.bo,
+                    I915_GEM_DOMAIN_SAMPLER,
+                    0, BASE_ADDRESS_MODIFY);
+    else {
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+        OUT_BATCH(batch, 0);
+    }
+
+
+    /*DW10. Instruct base address */
+    if (gpe_context->instruction_state.bo)
+        OUT_RELOC64(batch, gpe_context->instruction_state.bo,
+                    I915_GEM_DOMAIN_INSTRUCTION,
+                    0, BASE_ADDRESS_MODIFY);
+    else {
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+        OUT_BATCH(batch, 0);
+    }
+
+    /* DW12. Size limitation */
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);     //General State Access Upper Bound
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);     //Dynamic State Access Upper Bound
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);     //Indirect Object Access Upper Bound
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);     //Instruction Access Upper Bound
+
+    /*
+      OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                //LLC Coherent Base Address
+      OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );      //LLC Coherent Upper Bound
+    */
+
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_gpe_vfe_state(VADriverContextP ctx,
+                   struct i965_gpe_context *gpe_context,
+                   struct intel_batchbuffer *batch)
+{
+
+    BEGIN_BATCH(batch, 9);
+
+    OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
+    /* Scratch Space Base Pointer and Space */
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+
+    OUT_BATCH(batch,
+              gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
+              gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
+              gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
+    OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
+    OUT_BATCH(batch,
+              gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
+              gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
+
+    /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
+    OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
+    OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
+    OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
+
+    ADVANCE_BATCH(batch);
+
+}
+
+
+static void
+gen8_gpe_curbe_load(VADriverContextP ctx,
+                    struct i965_gpe_context *gpe_context,
+                    struct intel_batchbuffer *batch)
+{
+    BEGIN_BATCH(batch, 4);
+
+    OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
+    OUT_BATCH(batch, gpe_context->curbe.offset);
+
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen8_gpe_idrt(VADriverContextP ctx,
+              struct i965_gpe_context *gpe_context,
+              struct intel_batchbuffer *batch)
+{
+    BEGIN_BATCH(batch, 6);
+
+    OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
+    OUT_BATCH(batch, 0);
+
+    OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
+    OUT_BATCH(batch, gpe_context->idrt.offset);
+
+    ADVANCE_BATCH(batch);
+}
+
+
+void
+gen8_gpe_pipeline_setup(VADriverContextP ctx,
+                        struct i965_gpe_context *gpe_context,
+                        struct intel_batchbuffer *batch)
+{
+    intel_batchbuffer_emit_mi_flush(batch);
+
+    i965_gpe_select(ctx, gpe_context, batch);
+    gen8_gpe_state_base_address(ctx, gpe_context, batch);
+    gen8_gpe_vfe_state(ctx, gpe_context, batch);
+    gen8_gpe_curbe_load(ctx, gpe_context, batch);
+    gen8_gpe_idrt(ctx, gpe_context, batch);
+}
+
+void
+gen8_gpe_context_init(VADriverContextP ctx,
+                      struct i965_gpe_context *gpe_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    dri_bo *bo;
+    int bo_size;
+    unsigned int start_offset, end_offset;
+
+    dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      gpe_context->surface_state_binding_table.length,
+                      4096);
+    assert(bo);
+    gpe_context->surface_state_binding_table.bo = bo;
+
+    bo_size = gpe_context->idrt.max_entries * ALIGN(gpe_context->idrt.entry_size, 64) +
+              ALIGN(gpe_context->curbe.length, 64) +
+              gpe_context->sampler.max_entries * ALIGN(gpe_context->sampler.entry_size, 64);
+    dri_bo_unreference(gpe_context->dynamic_state.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      bo_size,
+                      4096);
+    assert(bo);
+    gpe_context->dynamic_state.bo = bo;
+    gpe_context->dynamic_state.bo_size = bo_size;
+
+    end_offset = 0;
+    gpe_context->dynamic_state.end_offset = 0;
+
+    /* Constant buffer offset */
+    start_offset = ALIGN(end_offset, 64);
+    dri_bo_unreference(gpe_context->curbe.bo);
+    gpe_context->curbe.bo = bo;
+    dri_bo_reference(gpe_context->curbe.bo);
+    gpe_context->curbe.offset = start_offset;
+    end_offset = start_offset + gpe_context->curbe.length;
+
+    /* Interface descriptor offset */
+    start_offset = ALIGN(end_offset, 64);
+    dri_bo_unreference(gpe_context->idrt.bo);
+    gpe_context->idrt.bo = bo;
+    dri_bo_reference(gpe_context->idrt.bo);
+    gpe_context->idrt.offset = start_offset;
+    end_offset = start_offset + ALIGN(gpe_context->idrt.entry_size, 64) * gpe_context->idrt.max_entries;
+
+    /* Sampler state offset */
+    start_offset = ALIGN(end_offset, 64);
+    dri_bo_unreference(gpe_context->sampler.bo);
+    gpe_context->sampler.bo = bo;
+    dri_bo_reference(gpe_context->sampler.bo);
+    gpe_context->sampler.offset = start_offset;
+    end_offset = start_offset + ALIGN(gpe_context->sampler.entry_size, 64) * gpe_context->sampler.max_entries;
+
+    /* update the end offset of dynamic_state */
+    gpe_context->dynamic_state.end_offset = end_offset;
+}
+
+
+void
+gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
+{
+    dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
+    gpe_context->surface_state_binding_table.bo = NULL;
+
+    dri_bo_unreference(gpe_context->instruction_state.bo);
+    gpe_context->instruction_state.bo = NULL;
+
+    dri_bo_unreference(gpe_context->dynamic_state.bo);
+    gpe_context->dynamic_state.bo = NULL;
+
+    dri_bo_unreference(gpe_context->indirect_state.bo);
+    gpe_context->indirect_state.bo = NULL;
+
+    dri_bo_unreference(gpe_context->curbe.bo);
+    gpe_context->curbe.bo = NULL;
+
+    dri_bo_unreference(gpe_context->idrt.bo);
+    gpe_context->idrt.bo = NULL;
+
+    dri_bo_unreference(gpe_context->sampler.bo);
+    gpe_context->sampler.bo = NULL;
+}
+
+
+void
+gen8_gpe_load_kernels(VADriverContextP ctx,
+                      struct i965_gpe_context *gpe_context,
+                      struct i965_kernel *kernel_list,
+                      unsigned int num_kernels)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int i, kernel_size = 0;
+    unsigned int kernel_offset, end_offset;
+    unsigned char *kernel_ptr;
+    struct i965_kernel *kernel;
+
+    assert(num_kernels <= MAX_GPE_KERNELS);
+    memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
+    gpe_context->num_kernels = num_kernels;
+
+    for (i = 0; i < num_kernels; i++) {
+        kernel = &gpe_context->kernels[i];
+
+        kernel_size += ALIGN(kernel->size, 64);
+    }
+
+    gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                                     "kernel shader",
+                                                     kernel_size,
+                                                     0x1000);
+    if (gpe_context->instruction_state.bo == NULL) {
+        WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
+        return;
+    }
+
+    assert(gpe_context->instruction_state.bo);
+
+    gpe_context->instruction_state.bo_size = kernel_size;
+    gpe_context->instruction_state.end_offset = 0;
+    end_offset = 0;
+
+    dri_bo_map(gpe_context->instruction_state.bo, 1);
+    kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
+    for (i = 0; i < num_kernels; i++) {
+        kernel_offset = ALIGN(end_offset, 64);
+        kernel = &gpe_context->kernels[i];
+        kernel->kernel_offset = kernel_offset;
+
+        if (kernel->size) {
+            memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
+
+            end_offset = kernel_offset + kernel->size;
+        }
+    }
+
+    gpe_context->instruction_state.end_offset = end_offset;
+
+    dri_bo_unmap(gpe_context->instruction_state.bo);
+
+    return;
+}
+
+static void
+gen9_gpe_state_base_address(VADriverContextP ctx,
+                            struct i965_gpe_context *gpe_context,
+                            struct intel_batchbuffer *batch)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    BEGIN_BATCH(batch, 19);
+
+    OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
+
+    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);              //General State Base Address
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+
+    /*DW4 Surface state base address */
+    OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4)); /* Surface state base address */
+
+    /*DW6. Dynamic state base address */
+    if (gpe_context->dynamic_state.bo)
+        OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
+                    I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
+                    I915_GEM_DOMAIN_RENDER,
+                    BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
+    else {
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+        OUT_BATCH(batch, 0);
+    }
+
+
+    /*DW8. Indirect Object base address */
+    if (gpe_context->indirect_state.bo)
+        OUT_RELOC64(batch, gpe_context->indirect_state.bo,
+                    I915_GEM_DOMAIN_SAMPLER,
+                    0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
+    else {
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+        OUT_BATCH(batch, 0);
+    }
+
+
+    /*DW10. Instruct base address */
+    if (gpe_context->instruction_state.bo)
+        OUT_RELOC64(batch, gpe_context->instruction_state.bo,
+                    I915_GEM_DOMAIN_INSTRUCTION,
+                    0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
+    else {
+        OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+        OUT_BATCH(batch, 0);
+    }
+
+
+    /* DW12. Size limitation */
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);     //General State Access Upper Bound
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);     //Dynamic State Access Upper Bound
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);     //Indirect Object Access Upper Bound
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);     //Instruction Access Upper Bound
+
+    /* the bindless surface state address */
+    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0xFFFFF000);
+
+    ADVANCE_BATCH(batch);
+}
+
+static void
+gen9_gpe_select(VADriverContextP ctx,
+                struct i965_gpe_context *gpe_context,
+                struct intel_batchbuffer *batch)
+{
+    BEGIN_BATCH(batch, 1);
+    OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
+              GEN9_PIPELINE_SELECTION_MASK |
+              GEN9_MEDIA_DOP_GATE_OFF |
+              GEN9_MEDIA_DOP_GATE_MASK |
+              GEN9_FORCE_MEDIA_AWAKE_ON |
+              GEN9_FORCE_MEDIA_AWAKE_MASK);
+    ADVANCE_BATCH(batch);
+}
+
+void
+gen9_gpe_pipeline_setup(VADriverContextP ctx,
+                        struct i965_gpe_context *gpe_context,
+                        struct intel_batchbuffer *batch)
+{
+    intel_batchbuffer_emit_mi_flush(batch);
+
+    gen9_gpe_select(ctx, gpe_context, batch);
+    gen9_gpe_state_base_address(ctx, gpe_context, batch);
+    gen8_gpe_vfe_state(ctx, gpe_context, batch);
+    gen8_gpe_curbe_load(ctx, gpe_context, batch);
+    gen8_gpe_idrt(ctx, gpe_context, batch);
+}
+
+void
+gen9_gpe_pipeline_end(VADriverContextP ctx,
+                      struct i965_gpe_context *gpe_context,
+                      struct intel_batchbuffer *batch)
+{
+    BEGIN_BATCH(batch, 1);
+    OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
+              GEN9_PIPELINE_SELECTION_MASK |
+              GEN9_MEDIA_DOP_GATE_ON |
+              GEN9_MEDIA_DOP_GATE_MASK |
+              GEN9_FORCE_MEDIA_AWAKE_OFF |
+              GEN9_FORCE_MEDIA_AWAKE_MASK);
+    ADVANCE_BATCH(batch);
+}
+
+Bool
+i965_allocate_gpe_resource(dri_bufmgr *bufmgr,
+                           struct i965_gpe_resource *res,
+                           int size,
+                           const char *name)
+{
+    if (!res || !size)
+        return false;
+
+    res->size = size;
+    res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
+    res->map = NULL;
+
+    return (res->bo != NULL);
+}
+
+void
+i965_object_surface_to_2d_gpe_resource_with_align(struct i965_gpe_resource *res,
+                                                  struct object_surface *obj_surface,
+                                                  unsigned int alignment)
+{
+    unsigned int swizzle;
+
+    res->type = I965_GPE_RESOURCE_2D;
+    res->width = ALIGN(obj_surface->orig_width, (1 << alignment));
+    res->height = ALIGN(obj_surface->orig_height, (1 << alignment));
+    res->pitch = obj_surface->width;
+    res->size = obj_surface->size;
+    res->cb_cr_pitch = obj_surface->cb_cr_pitch;
+    res->x_cb_offset = obj_surface->x_cb_offset;
+    res->y_cb_offset = obj_surface->y_cb_offset;
+    res->bo = obj_surface->bo;
+    res->map = NULL;
+
+    dri_bo_reference(res->bo);
+    dri_bo_get_tiling(obj_surface->bo, &res->tiling, &swizzle);
+}
+
+void
+i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
+                                       struct object_surface *obj_surface)
+{
+    i965_object_surface_to_2d_gpe_resource_with_align(res, obj_surface, 0);
+}
+
+void
+i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
+                                       dri_bo *bo)
+{
+    unsigned int swizzle;
+
+    res->type = I965_GPE_RESOURCE_BUFFER;
+    res->width = bo->size;
+    res->height = 1;
+    res->pitch = res->width;
+    res->size = res->pitch * res->width;
+    res->bo = bo;
+    res->map = NULL;
+
+    dri_bo_reference(res->bo);
+    dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
+}
+
+void
+i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
+                                   dri_bo *bo,
+                                   unsigned int width,
+                                   unsigned int height,
+                                   unsigned int pitch)
+{
+    unsigned int swizzle;
+
+    res->type = I965_GPE_RESOURCE_2D;
+    res->width = width;
+    res->height = height;
+    res->pitch = pitch;
+    res->size = res->pitch * res->width;
+    res->bo = bo;
+    res->map = NULL;
+
+    dri_bo_reference(res->bo);
+    dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
+}
+
+void
+i965_zero_gpe_resource(struct i965_gpe_resource *res)
+{
+    if (res->bo) {
+        dri_bo_map(res->bo, 1);
+        memset(res->bo->virtual, 0, res->size);
+        dri_bo_unmap(res->bo);
+    }
+}
+
+void
+i965_free_gpe_resource(struct i965_gpe_resource *res)
+{
+    dri_bo_unreference(res->bo);
+    res->bo = NULL;
+    res->map = NULL;
+}
+
+void *
+i965_map_gpe_resource(struct i965_gpe_resource *res)
+{
+    int ret;
+
+    if (res->bo) {
+        ret = dri_bo_map(res->bo, 1);
+
+        if (ret == 0)
+            res->map = res->bo->virtual;
+        else
+            res->map = NULL;
+    } else
+        res->map = NULL;
+
+    return res->map;
+}
+
+void
+i965_unmap_gpe_resource(struct i965_gpe_resource *res)
+{
+    if (res->bo && res->map)
+        dri_bo_unmap(res->bo);
+
+    res->map = NULL;
+}
+
+void
+gen8_gpe_mi_flush_dw(VADriverContextP ctx,
+                     struct intel_batchbuffer *batch,
+                     struct gpe_mi_flush_dw_parameter *params)
+{
+    int video_pipeline_cache_invalidate = 0;
+    int post_sync_operation = MI_FLUSH_DW_NOWRITE;
+
+    if (params->video_pipeline_cache_invalidate)
+        video_pipeline_cache_invalidate = MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE;
+
+    if (params->bo)
+        post_sync_operation = MI_FLUSH_DW_WRITE_QWORD;
+
+    __OUT_BATCH(batch, (MI_FLUSH_DW2 |
+                        video_pipeline_cache_invalidate |
+                        post_sync_operation |
+                        (5 - 2))); /* Always use PPGTT */
+
+    if (params->bo) {
+        __OUT_RELOC64(batch,
+                      params->bo,
+                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                      params->offset);
+    } else {
+        __OUT_BATCH(batch, 0);
+        __OUT_BATCH(batch, 0);
+    }
+
+    __OUT_BATCH(batch, params->dw0);
+    __OUT_BATCH(batch, params->dw1);
+}
+
+void
+gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
+                           struct intel_batchbuffer *batch,
+                           struct gpe_mi_store_data_imm_parameter *params)
+{
+    if (params->is_qword) {
+        __OUT_BATCH(batch, MI_STORE_DATA_IMM |
+                    (1 << 21) |
+                    (5 - 2)); /* Always use PPGTT */
+    } else {
+        __OUT_BATCH(batch, MI_STORE_DATA_IMM | (4 - 2)); /* Always use PPGTT */
+    }
+
+    __OUT_RELOC64(batch,
+                  params->bo,
+                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                  params->offset);
+    __OUT_BATCH(batch, params->dw0);
+
+    if (params->is_qword)
+        __OUT_BATCH(batch, params->dw1);
+}
+
+void
+gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
+                               struct intel_batchbuffer *batch,
+                               struct gpe_mi_store_register_mem_parameter *params)
+{
+    __OUT_BATCH(batch, (MI_STORE_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
+    __OUT_BATCH(batch, params->mmio_offset);
+    __OUT_RELOC64(batch,
+                  params->bo,
+                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                  params->offset);
+}
+
+void
+gen8_gpe_mi_load_register_mem(VADriverContextP ctx,
+                              struct intel_batchbuffer *batch,
+                              struct gpe_mi_load_register_mem_parameter *params)
+{
+    __OUT_BATCH(batch, (MI_LOAD_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
+    __OUT_BATCH(batch, params->mmio_offset);
+    __OUT_RELOC64(batch,
+                  params->bo,
+                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                  params->offset);
+}
+
+void
+gen8_gpe_mi_load_register_imm(VADriverContextP ctx,
+                              struct intel_batchbuffer *batch,
+                              struct gpe_mi_load_register_imm_parameter *params)
+{
+    __OUT_BATCH(batch, (MI_LOAD_REGISTER_IMM | (3 - 2)));
+    __OUT_BATCH(batch, params->mmio_offset);
+    __OUT_BATCH(batch, params->data);
+}
+
+void
+gen8_gpe_mi_load_register_reg(VADriverContextP ctx,
+                              struct intel_batchbuffer *batch,
+                              struct gpe_mi_load_register_reg_parameter *params)
+{
+    __OUT_BATCH(batch, (MI_LOAD_REGISTER_REG | (3 - 2)));
+    __OUT_BATCH(batch, params->src_mmio_offset);
+    __OUT_BATCH(batch, params->dst_mmio_offset);
+}
+
+void
+gen9_gpe_mi_math(VADriverContextP ctx,
+                 struct intel_batchbuffer *batch,
+                 struct gpe_mi_math_parameter *params)
+{
+    __OUT_BATCH(batch, (MI_MATH | (params->num_instructions - 1)));
+    intel_batchbuffer_data(batch, params->instruction_list, params->num_instructions * 4);
+}
+
+void
+gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
+                                         struct intel_batchbuffer *batch,
+                                         struct gpe_mi_conditional_batch_buffer_end_parameter *params)
+{
+    int compare_mask_mode_enabled = MI_COMPARE_MASK_MODE_ENANBLED;
+
+    if (params->compare_mask_mode_disabled)
+        compare_mask_mode_enabled = 0;
+
+    __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
+                        (1 << 21) |
+                        compare_mask_mode_enabled |
+                        (4 - 2))); /* Always use PPGTT */
+    __OUT_BATCH(batch, params->compare_data);
+    __OUT_RELOC64(batch,
+                  params->bo,
+                  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  params->offset);
+}
+
+void
+gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
+                               struct intel_batchbuffer *batch,
+                               struct gpe_mi_batch_buffer_start_parameter *params)
+{
+    __OUT_BATCH(batch, (MI_BATCH_BUFFER_START |
+                        (!!params->is_second_level << 22) |
+                        (!params->use_global_gtt << 8) |
+                        (1 << 0)));
+    __OUT_RELOC64(batch,
+                  params->bo,
+                  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  params->offset);
+}
+
+void
+gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
+                                    struct i965_gpe_context *gpe_context,
+                                    struct gpe_dynamic_state_parameter *ds)
+{
+    if (!ds->bo || !gpe_context)
+        return;
+
+    dri_bo_unreference(gpe_context->dynamic_state.bo);
+    gpe_context->dynamic_state.bo = ds->bo;
+    dri_bo_reference(gpe_context->dynamic_state.bo);
+    gpe_context->dynamic_state.bo_size = ds->bo_size;
+
+    /* curbe buffer is a part of the dynamic buffer */
+    dri_bo_unreference(gpe_context->curbe.bo);
+    gpe_context->curbe.bo = ds->bo;
+    dri_bo_reference(gpe_context->curbe.bo);
+    gpe_context->curbe.offset = ds->curbe_offset;
+
+    /* idrt buffer is a part of the dynamic buffer */
+    dri_bo_unreference(gpe_context->idrt.bo);
+    gpe_context->idrt.bo = ds->bo;
+    dri_bo_reference(gpe_context->idrt.bo);
+    gpe_context->idrt.offset = ds->idrt_offset;
+
+    /* sampler buffer is a part of the dynamic buffer */
+    dri_bo_unreference(gpe_context->sampler.bo);
+    gpe_context->sampler.bo = ds->bo;
+    dri_bo_reference(gpe_context->sampler.bo);
+    gpe_context->sampler.offset = ds->sampler_offset;
+
+    return;
+}
+
+void *
+i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
+{
+    dri_bo_map(gpe_context->curbe.bo, 1);
+
+    return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
+}
+
+void
+i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
+{
+    dri_bo_unmap(gpe_context->curbe.bo);
+}
+
+void
+gen9_gpe_reset_binding_table(VADriverContextP ctx,
+                             struct i965_gpe_context *gpe_context)
+{
+    unsigned int *binding_table;
+    unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
+    int i;
+
+    dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+    binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
+
+    for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
+        *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN9;
+    }
+
+    dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
+
+void
+gen8_gpe_setup_interface_data(VADriverContextP ctx,
+                              struct i965_gpe_context *gpe_context)
+{
+    struct gen8_interface_descriptor_data *desc;
+    int i;
+    dri_bo *bo;
+    unsigned char *desc_ptr;
+
+    bo = gpe_context->idrt.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
+    desc = (struct gen8_interface_descriptor_data *)desc_ptr;
+
+    for (i = 0; i < gpe_context->num_kernels; i++) {
+        struct i965_kernel *kernel;
+
+        kernel = &gpe_context->kernels[i];
+        assert(sizeof(*desc) == 32);
+
+        /*Setup the descritor table*/
+        memset(desc, 0, sizeof(*desc));
+        desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
+        desc->desc3.sampler_count = 0;
+        desc->desc3.sampler_state_pointer = (gpe_context->sampler.offset >> 5);
+        desc->desc4.binding_table_entry_count = 0;
+        desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
+        desc->desc5.constant_urb_entry_read_offset = 0;
+        desc->desc5.constant_urb_entry_read_length = ALIGN(gpe_context->curbe.length, 32) >> 5; // in registers
+
+        desc++;
+    }
+
+    dri_bo_unmap(bo);
+}
+
+static void
+gen9_gpe_set_surface_tiling(struct gen9_surface_state *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss0.tiled_surface = 0;
+        ss->ss0.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
+gen9_gpe_set_surface2_tiling(struct gen9_surface_state2 *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss2.tiled_surface = 0;
+        ss->ss2.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
+gen9_gpe_set_2d_surface_state(struct gen9_surface_state *ss,
+                              unsigned int cacheability_control,
+                              unsigned int format,
+                              unsigned int tiling,
+                              unsigned int width,
+                              unsigned int height,
+                              unsigned int pitch,
+                              uint64_t base_offset,
+                              unsigned int y_offset)
+{
+    memset(ss, 0, sizeof(*ss));
+
+    /* Always set 1(align 4 mode) */
+    ss->ss0.vertical_alignment = 1;
+    ss->ss0.horizontal_alignment = 1;
+
+    ss->ss0.surface_format = format;
+    ss->ss0.surface_type = I965_SURFACE_2D;
+
+    ss->ss1.surface_mocs = cacheability_control;
+
+    ss->ss2.width = width - 1;
+    ss->ss2.height = height - 1;
+
+    ss->ss3.pitch = pitch - 1;
+
+    ss->ss5.y_offset = y_offset;
+
+    ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+    ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+    ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+    ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+    ss->ss8.base_addr = (uint32_t)base_offset;
+    ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
+
+    gen9_gpe_set_surface_tiling(ss, tiling);
+}
+
+/* This is only for NV12 format */
+static void
+gen9_gpe_set_adv_surface_state(struct gen9_surface_state2 *ss,
+                               unsigned int v_direction,
+                               unsigned int cacheability_control,
+                               unsigned int format,
+                               unsigned int tiling,
+                               unsigned int width,
+                               unsigned int height,
+                               unsigned int pitch,
+                               uint64_t base_offset,
+                               unsigned int y_cb_offset)
+{
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
+    ss->ss1.width = width - 1;
+    ss->ss1.height = height - 1;
+
+    ss->ss2.surface_format = format;
+    ss->ss2.interleave_chroma = 1;
+    ss->ss2.pitch = pitch - 1;
+
+    ss->ss3.y_offset_for_cb = y_cb_offset;
+
+    ss->ss5.surface_object_mocs = cacheability_control;
+
+    ss->ss6.base_addr = (uint32_t)base_offset;
+    ss->ss7.base_addr_high = (uint32_t)(base_offset >> 32);
+
+    gen9_gpe_set_surface2_tiling(ss, tiling);
+}
+
+static void
+gen9_gpe_set_buffer2_surface_state(struct gen9_surface_state *ss,
+                                   unsigned int cacheability_control,
+                                   unsigned int format,
+                                   unsigned int size,
+                                   unsigned int pitch,
+                                   uint64_t base_offset)
+{
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss0.surface_format = format;
+    ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+    ss->ss1.surface_mocs = cacheability_control;
+
+    ss->ss2.width = (size - 1) & 0x7F;
+    ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
+
+    ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
+    ss->ss3.pitch = pitch - 1;
+
+    ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+    ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+    ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+    ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+    ss->ss8.base_addr = (uint32_t)base_offset;
+    ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
+}
+
+void
+gen9_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+                             struct i965_gpe_surface *gpe_surface,
+                             int index)
+{
+    char *buf;
+    unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
+    unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
+                                        index * SURFACE_STATE_PADDED_SIZE_GEN9;
+    unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
+                                        index * 4;
+    struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
+
+    dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
+
+    dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+    buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
+    *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
+
+    if (gpe_surface->is_2d_surface && gpe_surface->is_override_offset) {
+        struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
+
+        width = gpe_resource->width;
+        height = gpe_resource->height;
+        pitch = gpe_resource->pitch;
+
+        if (gpe_surface->is_media_block_rw) {
+            if (gpe_surface->is_16bpp)
+                width = (ALIGN(width * 2, 4) >> 2);
+            else
+                width = (ALIGN(width, 4) >> 2);
+        }
+
+
+        gen9_gpe_set_2d_surface_state(ss,
+                                      gpe_surface->cacheability_control,
+                                      gpe_surface->format,
+                                      tiling,
+                                      width, height, pitch,
+                                      gpe_resource->bo->offset64 + gpe_surface->offset,
+                                      0);
+
+        dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          gpe_surface->offset,
+                          surface_state_offset + offsetof(struct gen9_surface_state, ss8),
+                          gpe_resource->bo);
+    } else if (gpe_surface->is_2d_surface && gpe_surface->is_uv_surface) {
+        unsigned int cbcr_offset;
+        struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
+
+        width = gpe_resource->width;
+        height = gpe_resource->height / 2;
+        pitch = gpe_resource->pitch;
+
+        if (gpe_surface->is_media_block_rw) {
+            if (gpe_surface->is_16bpp)
+                width = (ALIGN(width * 2, 4) >> 2);
+            else
+                width = (ALIGN(width, 4) >> 2);
+        }
+
+        if (tiling == I915_TILING_Y) {
+            tile_alignment = 32;
+        } else if (tiling == I915_TILING_X) {
+            tile_alignment = 8;
+        } else
+            tile_alignment = 1;
+
+        y_offset = (gpe_resource->y_cb_offset % tile_alignment);
+        cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
+
+        gen9_gpe_set_2d_surface_state(ss,
+                                      gpe_surface->cacheability_control,
+                                      I965_SURFACEFORMAT_R16_UINT,
+                                      tiling,
+                                      width, height, pitch,
+                                      gpe_resource->bo->offset64 + cbcr_offset,
+                                      y_offset);
+
+        dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          cbcr_offset,
+                          surface_state_offset + offsetof(struct gen9_surface_state, ss8),
+                          gpe_resource->bo);
+    } else if (gpe_surface->is_2d_surface) {
+        struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
+
+        width = gpe_resource->width;
+        height = gpe_resource->height;
+        pitch = gpe_resource->pitch;
+
+        if (gpe_surface->is_media_block_rw) {
+            if (gpe_surface->is_16bpp)
+                width = (ALIGN(width * 2, 4) >> 2);
+            else
+                width = (ALIGN(width, 4) >> 2);
+        }
+
+        gen9_gpe_set_2d_surface_state(ss,
+                                      gpe_surface->cacheability_control,
+                                      gpe_surface->format,
+                                      tiling,
+                                      width, height, pitch,
+                                      gpe_resource->bo->offset64,
+                                      y_offset);
+
+        dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          0,
+                          surface_state_offset + offsetof(struct gen9_surface_state, ss8),
+                          gpe_resource->bo);
+    } else if (gpe_surface->is_adv_surface) {
+        struct gen9_surface_state2 *ss = (struct gen9_surface_state2 *)(buf + surface_state_offset);
+
+        width = gpe_resource->width;
+        height = gpe_resource->height;
+        pitch = gpe_resource->pitch;
+
+        gen9_gpe_set_adv_surface_state(ss,
+                                       gpe_surface->v_direction,
+                                       gpe_surface->cacheability_control,
+                                       MFX_SURFACE_PLANAR_420_8,
+                                       tiling,
+                                       width, height, pitch,
+                                       gpe_resource->bo->offset64,
+                                       gpe_resource->y_cb_offset);
+
+        dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          0,
+                          surface_state_offset + offsetof(struct gen9_surface_state2, ss6),
+                          gpe_resource->bo);
+    } else {
+        struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
+        unsigned int format;
+
+        assert(gpe_surface->is_buffer);
+
+        if (gpe_surface->is_raw_buffer) {
+            format = I965_SURFACEFORMAT_RAW;
+            pitch = 1;
+        } else {
+            format = I965_SURFACEFORMAT_R32_UINT;
+            pitch = sizeof(unsigned int);
+        }
+
+        gen9_gpe_set_buffer2_surface_state(ss,
+                                           gpe_surface->cacheability_control,
+                                           format,
+                                           gpe_surface->size,
+                                           pitch,
+                                           gpe_resource->bo->offset64 + gpe_surface->offset);
+
+        dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          gpe_surface->offset,
+                          surface_state_offset + offsetof(struct gen9_surface_state, ss8),
+                          gpe_resource->bo);
+    }
+
+    dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
+
+bool
+i965_gpe_allocate_2d_resource(dri_bufmgr *bufmgr,
+                              struct i965_gpe_resource *res,
+                              int width,
+                              int height,
+                              int pitch,
+                              const char *name)
+{
+    int bo_size;
+
+    if (!res)
+        return false;
+
+    res->type = I965_GPE_RESOURCE_2D;
+    res->width = width;
+    res->height = height;
+    res->pitch = pitch;
+
+    bo_size = ALIGN(height, 16) * pitch;
+    res->size = bo_size;
+
+    res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
+    res->map = NULL;
+
+    return true;
+}
+
+void
+gen8_gpe_media_state_flush(VADriverContextP ctx,
+                           struct i965_gpe_context *gpe_context,
+                           struct intel_batchbuffer *batch)
+{
+    BEGIN_BATCH(batch, 2);
+
+    OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH | (2 - 2));
+    OUT_BATCH(batch, 0);
+
+    ADVANCE_BATCH(batch);
+}
+
+void
+gen8_gpe_media_object(VADriverContextP ctx,
+                      struct i965_gpe_context *gpe_context,
+                      struct intel_batchbuffer *batch,
+                      struct gpe_media_object_parameter *param)
+{
+    int batch_size, subdata_size;
+
+    batch_size = 6;
+    subdata_size = 0;
+    if (param->pinline_data && param->inline_size) {
+        subdata_size = ALIGN(param->inline_size, 4);
+        batch_size += subdata_size / 4;
+    }
+    BEGIN_BATCH(batch, batch_size);
+    OUT_BATCH(batch, CMD_MEDIA_OBJECT | (batch_size - 2));
+    OUT_BATCH(batch, param->interface_offset);
+    OUT_BATCH(batch, param->use_scoreboard << 21);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, (param->scoreboard_y << 16 |
+                      param->scoreboard_x));
+    OUT_BATCH(batch, param->scoreboard_mask);
+
+    if (subdata_size)
+        intel_batchbuffer_data(batch, param->pinline_data, subdata_size);
+
+    ADVANCE_BATCH(batch);
+}
+
+void
+gen8_gpe_media_object_walker(VADriverContextP ctx,
+                             struct i965_gpe_context *gpe_context,
+                             struct intel_batchbuffer *batch,
+                             struct gpe_media_object_walker_parameter *param)
+{
+    int walker_length;
+
+    walker_length = 17;
+    if (param->inline_size)
+        walker_length += ALIGN(param->inline_size, 4) / 4;
+    BEGIN_BATCH(batch, walker_length);
+    OUT_BATCH(batch, CMD_MEDIA_OBJECT_WALKER | (walker_length - 2));
+    OUT_BATCH(batch, param->interface_offset);
+    OUT_BATCH(batch, param->use_scoreboard << 21);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, (param->group_id_loop_select << 8 |
+                      param->scoreboard_mask)); // DW5
+    OUT_BATCH(batch, (param->color_count_minus1 << 24 |
+                      param->middle_loop_extra_steps << 16 |
+                      param->mid_loop_unit_y << 12 |
+                      param->mid_loop_unit_x << 8));
+    OUT_BATCH(batch, ((param->global_loop_exec_count & 0x3ff) << 16 |
+                      (param->local_loop_exec_count & 0x3ff)));
+    OUT_BATCH(batch, param->block_resolution.value);
+    OUT_BATCH(batch, param->local_start.value);
+    OUT_BATCH(batch, 0); // DW10
+    OUT_BATCH(batch, param->local_outer_loop_stride.value);
+    OUT_BATCH(batch, param->local_inner_loop_unit.value);
+    OUT_BATCH(batch, param->global_resolution.value);
+    OUT_BATCH(batch, param->global_start.value);
+    OUT_BATCH(batch, param->global_outer_loop_stride.value);
+    OUT_BATCH(batch, param->global_inner_loop_unit.value);
+
+    if (param->pinline_data && param->inline_size)
+        intel_batchbuffer_data(batch, param->pinline_data, ALIGN(param->inline_size, 4));
+
+    ADVANCE_BATCH(batch);
+}
+
+
+void
+intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
+                                             struct gpe_media_object_walker_parameter *walker_param)
+{
+    memset(walker_param, 0, sizeof(*walker_param));
+
+    walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
+
+    walker_param->block_resolution.x = kernel_walker_param->resolution_x;
+    walker_param->block_resolution.y = kernel_walker_param->resolution_y;
+
+    walker_param->global_resolution.x = kernel_walker_param->resolution_x;
+    walker_param->global_resolution.y = kernel_walker_param->resolution_y;
+
+    walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
+    walker_param->global_outer_loop_stride.y = 0;
+
+    walker_param->global_inner_loop_unit.x = 0;
+    walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
+
+    walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
+    walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
+
+    if (kernel_walker_param->no_dependency) {
+        /* The no_dependency is used for VPP */
+        walker_param->scoreboard_mask = 0;
+        walker_param->use_scoreboard = 0;
+        // Raster scan walking pattern
+        walker_param->local_outer_loop_stride.x = 0;
+        walker_param->local_outer_loop_stride.y = 1;
+        walker_param->local_inner_loop_unit.x = 1;
+        walker_param->local_inner_loop_unit.y = 0;
+        walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
+        walker_param->local_end.y = 0;
+    } else {
+        walker_param->local_end.x = 0;
+        walker_param->local_end.y = 0;
+
+        // 26 degree
+        walker_param->scoreboard_mask = 0x0F;
+        walker_param->local_outer_loop_stride.x = 1;
+        walker_param->local_outer_loop_stride.y = 0;
+        walker_param->local_inner_loop_unit.x = -2;
+        walker_param->local_inner_loop_unit.y = 1;
+    }
+}
+
+void
+gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context)
+{
+    unsigned int *binding_table;
+    unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
+    int i;
+
+    dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+    binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
+
+    for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
+        *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN8;
+    }
+
+    dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
+
+static void
+gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
+                              unsigned int vert_line_stride_offset,
+                              unsigned int vert_line_stride,
+                              unsigned int cacheability_control,
+                              unsigned int format,
+                              unsigned int tiling,
+                              unsigned int width,
+                              unsigned int height,
+                              unsigned int pitch,
+                              unsigned int base_offset,
+                              unsigned int y_offset)
+{
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
+    ss->ss0.vert_line_stride = vert_line_stride;
+    ss->ss0.surface_format = format;
+    ss->ss0.surface_type = I965_SURFACE_2D;
+
+    ss->ss1.surface_mocs = cacheability_control;
+
+    ss->ss2.width = width - 1;
+    ss->ss2.height = height - 1;
+
+    ss->ss3.pitch = pitch - 1;
+
+    ss->ss5.y_offset = y_offset;
+
+    ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+    ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+    ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+    ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+    ss->ss8.base_addr = base_offset;
+
+    gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
+                               unsigned int v_direction,
+                               unsigned int cacheability_control,
+                               unsigned int format,
+                               unsigned int tiling,
+                               unsigned int width,
+                               unsigned int height,
+                               unsigned int pitch,
+                               unsigned int base_offset,
+                               unsigned int y_cb_offset)
+{
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
+    ss->ss1.width = width - 1;
+    ss->ss1.height = height - 1;
+
+    ss->ss2.surface_format = format;
+    ss->ss2.interleave_chroma = 1;
+    ss->ss2.pitch = pitch - 1;
+
+    ss->ss3.y_offset_for_cb = y_cb_offset;
+
+    ss->ss5.surface_object_mocs = cacheability_control;
+
+    ss->ss6.base_addr = base_offset;
+
+    gen8_gpe_set_surface2_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
+                                   unsigned int cacheability_control,
+                                   unsigned int format,
+                                   unsigned int size,
+                                   unsigned int pitch,
+                                   unsigned int base_offset)
+{
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss0.surface_format = format;
+    ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+    ss->ss1.surface_mocs = cacheability_control;
+
+    ss->ss2.width = (size - 1) & 0x7F;
+    ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
+
+    ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
+    ss->ss3.pitch = pitch - 1;
+
+    ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+    ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+    ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+    ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+    ss->ss8.base_addr = base_offset;
+}
+
+void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+                             struct i965_gpe_surface *gpe_surface,
+                             int index)
+{
+    char *buf;
+    unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
+    unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
+                                        index * SURFACE_STATE_PADDED_SIZE_GEN8;
+    unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
+                                        index * 4;
+    struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
+
+    dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
+
+    dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+    buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
+    *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
+
+    if (gpe_surface->is_2d_surface) {
+        struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+        unsigned int target_offset;
+
+        width = gpe_resource->width;
+        height = gpe_resource->height;
+        pitch = gpe_resource->pitch;
+
+        if (gpe_surface->is_override_offset) {
+            y_offset = 0;
+            target_offset = gpe_surface->offset;
+        } else if (gpe_surface->is_uv_surface) {
+            height /= 2;
+
+            if (tiling == I915_TILING_Y) {
+                tile_alignment = 32;
+            } else if (tiling == I915_TILING_X) {
+                tile_alignment = 8;
+            } else
+                tile_alignment = 1;
+
+            y_offset = (gpe_resource->y_cb_offset % tile_alignment);
+            target_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
+        } else {
+            y_offset = 0;
+            target_offset = 0;
+        }
+
+        if (gpe_surface->is_media_block_rw) {
+            width = (ALIGN(width, 4) >> 2);
+        }
+
+        gen8_gpe_set_2d_surface_state(ss,
+                                      gpe_surface->vert_line_stride_offset,
+                                      gpe_surface->vert_line_stride,
+                                      gpe_surface->cacheability_control,
+                                      gpe_surface->format,
+                                      tiling,
+                                      width, height, pitch,
+                                      gpe_resource->bo->offset64 + target_offset,
+                                      y_offset);
+
+        dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          target_offset,
+                          surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+                          gpe_resource->bo);
+    } else if (gpe_surface->is_adv_surface) {
+        struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf + surface_state_offset);
+
+        width = gpe_resource->width;
+        height = gpe_resource->height;
+        pitch = gpe_resource->pitch;
+
+        gen8_gpe_set_adv_surface_state(ss,
+                                       gpe_surface->v_direction,
+                                       gpe_surface->cacheability_control,
+                                       MFX_SURFACE_PLANAR_420_8,
+                                       tiling,
+                                       width, height, pitch,
+                                       gpe_resource->bo->offset64,
+                                       gpe_resource->y_cb_offset);
+
+        dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          0,
+                          surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
+                          gpe_resource->bo);
+    } else {
+        struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+        unsigned int format;
+
+        assert(gpe_surface->is_buffer);
+
+        if (gpe_surface->is_raw_buffer) {
+            format = I965_SURFACEFORMAT_RAW;
+            pitch = 1;
+        } else {
+            format = I965_SURFACEFORMAT_R32_UINT;
+            pitch = sizeof(unsigned int);
+        }
+
+        gen8_gpe_set_buffer2_surface_state(ss,
+                                           gpe_surface->cacheability_control,
+                                           format,
+                                           gpe_surface->size,
+                                           pitch,
+                                           gpe_resource->bo->offset64 + gpe_surface->offset);
+
+        dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+                          I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                          gpe_surface->offset,
+                          surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+                          gpe_resource->bo);
+    }
+
+    dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
+
+void
+gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
+                                         struct intel_batchbuffer *batch,
+                                         struct gpe_mi_conditional_batch_buffer_end_parameter *param)
+{
+    __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
+                        (1 << 21) |
+                        (4 - 2))); /* Always use PPGTT */
+    __OUT_BATCH(batch, param->compare_data);
+    __OUT_RELOC64(batch,
+                  param->bo,
+                  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  param->offset);
+
+}
+
+void
+gen8_gpe_mi_copy_mem_mem(VADriverContextP ctx,
+                         struct intel_batchbuffer *batch,
+                         struct gpe_mi_copy_mem_parameter *param)
+{
+    __OUT_BATCH(batch, (MI_COPY_MEM_MEM |
+                        (0 << 22) |
+                        (0 << 21) |
+                        (5 - 2))); /* Always use PPGTT for src and dst */
+    __OUT_RELOC64(batch,
+                  param->dst_bo,
+                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                  param->dst_offset);
+    __OUT_RELOC64(batch,
+                  param->src_bo,
+                  I915_GEM_DOMAIN_RENDER, 0,
+                  param->src_offset);
+}
+
+void
+gen8_gpe_pipe_control(VADriverContextP ctx,
+                      struct intel_batchbuffer *batch,
+                      struct gpe_pipe_control_parameter *param)
+{
+    int render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+    int dc_flush_enable = 0;
+    int state_cache_invalidation_enable = 0;
+    int constant_cache_invalidation_enable = 0;
+    int vf_cache_invalidation_enable = 0;
+    int instruction_cache_invalidation_enable = 0;
+    int post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
+    int use_global_gtt = CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8;
+    int cs_stall_enable = !param->disable_cs_stall;
+
+    switch (param->flush_mode) {
+    case PIPE_CONTROL_FLUSH_WRITE_CACHE:
+        render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+        dc_flush_enable = CMD_PIPE_CONTROL_DC_FLUSH;
+        break;
+
+    case PIPE_CONTROL_FLUSH_READ_CACHE:
+        render_target_cache_flush_enable = 0;
+        state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
+        constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
+        vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
+        instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
+        break;
+
+    case PIPE_CONTROL_FLUSH_NONE:
+    default:
+        render_target_cache_flush_enable = 0;
+        break;
+    }
+
+    if (param->bo) {
+        post_sync_operation = CMD_PIPE_CONTROL_WRITE_QWORD;
+        use_global_gtt = CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8;
+    } else {
+        post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
+        render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+        state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
+        constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
+        vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
+        instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
+    }
+
+    __OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
+    __OUT_BATCH(batch, (render_target_cache_flush_enable |
+                        dc_flush_enable |
+                        state_cache_invalidation_enable |
+                        constant_cache_invalidation_enable |
+                        vf_cache_invalidation_enable |
+                        instruction_cache_invalidation_enable |
+                        post_sync_operation |
+                        use_global_gtt |
+                        cs_stall_enable |
+                        CMD_PIPE_CONTROL_FLUSH_ENABLE));
+
+    if (param->bo)
+        __OUT_RELOC64(batch,
+                      param->bo,
+                      I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_RENDER,
+                      param->offset);
+    else {
+        __OUT_BATCH(batch, 0);
+        __OUT_BATCH(batch, 0);
+    }
+
+    __OUT_BATCH(batch, param->dw0);
+    __OUT_BATCH(batch, param->dw1);
+}
+
+void
+i965_init_media_object_walker_parameter(struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
+                                        struct gpe_media_object_walker_parameter *walker_param)
+{
+    memset(walker_param, 0, sizeof(*walker_param));
+
+    walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
+
+    walker_param->block_resolution.x = kernel_walker_param->resolution_x;
+    walker_param->block_resolution.y = kernel_walker_param->resolution_y;
+
+    walker_param->global_resolution.x = kernel_walker_param->resolution_x;
+    walker_param->global_resolution.y = kernel_walker_param->resolution_y;
+
+    walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
+    walker_param->global_outer_loop_stride.y = 0;
+
+    walker_param->global_inner_loop_unit.x = 0;
+    walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
+
+    walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
+    walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
+
+    if (kernel_walker_param->no_dependency) {
+        walker_param->scoreboard_mask = 0;
+        // Raster scan walking pattern
+        walker_param->local_outer_loop_stride.x = 0;
+        walker_param->local_outer_loop_stride.y = 1;
+        walker_param->local_inner_loop_unit.x = 1;
+        walker_param->local_inner_loop_unit.y = 0;
+        walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
+        walker_param->local_end.y = 0;
+    } else if (kernel_walker_param->use_vertical_raster_scan) {
+        walker_param->scoreboard_mask = 0x1;
+        walker_param->use_scoreboard = 0;
+        // Raster scan walking pattern
+        walker_param->local_outer_loop_stride.x = 1;
+        walker_param->local_outer_loop_stride.y = 0;
+        walker_param->local_inner_loop_unit.x = 0;
+        walker_param->local_inner_loop_unit.y = 1;
+        walker_param->local_end.x = 0;
+        walker_param->local_end.y = kernel_walker_param->resolution_y - 1;
+    } else {
+        walker_param->local_end.x = 0;
+        walker_param->local_end.y = 0;
+
+        if (kernel_walker_param->walker_degree == WALKER_45Z_DEGREE) {
+            // 45z degree vp9
+            walker_param->scoreboard_mask = 0x0F;
+
+            walker_param->global_loop_exec_count = 0x3FF;
+            walker_param->local_loop_exec_count = 0x3FF;
+
+            walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
+            walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
+
+            walker_param->global_start.x = 0;
+            walker_param->global_start.y = 0;
+
+            walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
+            walker_param->global_outer_loop_stride.y = 0;
+
+            walker_param->global_inner_loop_unit.x = 0;
+            walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
+
+            walker_param->block_resolution.x = walker_param->global_resolution.x;
+            walker_param->block_resolution.y = walker_param->global_resolution.y;
+
+            walker_param->local_start.x = 0;
+            walker_param->local_start.y = 0;
+
+            walker_param->local_outer_loop_stride.x = 1;
+            walker_param->local_outer_loop_stride.y = 0;
+
+            walker_param->local_inner_loop_unit.x = -1;
+            walker_param->local_inner_loop_unit.y = 4;
+
+            walker_param->middle_loop_extra_steps = 3;
+            walker_param->mid_loop_unit_x = 0;
+            walker_param->mid_loop_unit_y = 1;
+        } else if (kernel_walker_param->walker_degree == WALKER_45_DEGREE) {
+
+            walker_param->scoreboard_mask = 0x03;
+            // 45 order in local loop
+            walker_param->local_outer_loop_stride.x = 1;
+            walker_param->local_outer_loop_stride.y = 0;
+            walker_param->local_inner_loop_unit.x = -1;
+            walker_param->local_inner_loop_unit.y = 1;
+        } else if (kernel_walker_param->walker_degree == WALKER_26Z_DEGREE) {
+            // 26z HEVC
+            walker_param->scoreboard_mask = 0x7f;
+
+            // z order in local loop
+            walker_param->local_outer_loop_stride.x = 0;
+            walker_param->local_outer_loop_stride.y = 1;
+            walker_param->local_inner_loop_unit.x = 1;
+            walker_param->local_inner_loop_unit.y = 0;
+
+            walker_param->block_resolution.x = 2;
+            walker_param->block_resolution.y = 2;
+
+            walker_param->global_outer_loop_stride.x = 2;
+            walker_param->global_outer_loop_stride.y = 0;
+
+            walker_param->global_inner_loop_unit.x = 0xFFF - 4 + 1;
+            walker_param->global_inner_loop_unit.y = 2;
+
+        } else {
+            // 26 degree
+            walker_param->scoreboard_mask = 0x0F;
+            walker_param->local_outer_loop_stride.x = 1;
+            walker_param->local_outer_loop_stride.y = 0;
+            walker_param->local_inner_loop_unit.x = -2;
+            walker_param->local_inner_loop_unit.y = 1;
+        }
+    }
+}
+
+void
+i965_add_2d_gpe_surface(VADriverContextP ctx,
+                        struct i965_gpe_context *gpe_context,
+                        struct object_surface *obj_surface,
+                        int is_uv_surface,
+                        int is_media_block_rw,
+                        unsigned int format,
+                        int index)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_gpe_table *gpe = &i965->gpe_table;
+    struct i965_gpe_resource gpe_resource;
+    struct i965_gpe_surface gpe_surface;
+
+    memset(&gpe_surface, 0, sizeof(gpe_surface));
+
+    i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
+    gpe_surface.gpe_resource = &gpe_resource;
+    gpe_surface.is_2d_surface = 1;
+    gpe_surface.is_uv_surface = !!is_uv_surface;
+    gpe_surface.is_media_block_rw = !!is_media_block_rw;
+
+    gpe_surface.cacheability_control = i965->intel.mocs_state;
+    gpe_surface.format = format;
+
+    if (gpe_surface.is_media_block_rw) {
+        if (obj_surface->fourcc == VA_FOURCC_P010)
+            gpe_surface.is_16bpp = 1;
+    }
+
+    gpe->context_add_surface(gpe_context, &gpe_surface, index);
+    i965_free_gpe_resource(&gpe_resource);
+}
+
+void
+i965_add_adv_gpe_surface(VADriverContextP ctx,
+                         struct i965_gpe_context *gpe_context,
+                         struct object_surface *obj_surface,
+                         int index)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_gpe_table *gpe = &i965->gpe_table;
+    struct i965_gpe_resource gpe_resource;
+    struct i965_gpe_surface gpe_surface;
+
+    memset(&gpe_surface, 0, sizeof(gpe_surface));
+
+    i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
+    gpe_surface.gpe_resource = &gpe_resource;
+    gpe_surface.is_adv_surface = 1;
+    gpe_surface.cacheability_control = i965->intel.mocs_state;
+    gpe_surface.v_direction = 2;
+
+    gpe->context_add_surface(gpe_context, &gpe_surface, index);
+    i965_free_gpe_resource(&gpe_resource);
+}
+
+void
+i965_add_buffer_gpe_surface(VADriverContextP ctx,
+                            struct i965_gpe_context *gpe_context,
+                            struct i965_gpe_resource *gpe_buffer,
+                            int is_raw_buffer,
+                            unsigned int size,
+                            unsigned int offset,
+                            int index)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_gpe_table *gpe = &i965->gpe_table;
+    struct i965_gpe_surface gpe_surface;
+
+    memset(&gpe_surface, 0, sizeof(gpe_surface));
+
+    gpe_surface.gpe_resource = gpe_buffer;
+    gpe_surface.is_buffer = 1;
+    gpe_surface.is_raw_buffer = !!is_raw_buffer;
+    gpe_surface.cacheability_control = i965->intel.mocs_state;
+    gpe_surface.size = size;
+    gpe_surface.offset = offset;
+
+    gpe->context_add_surface(gpe_context, &gpe_surface, index);
+}
+
+void
+i965_add_buffer_2d_gpe_surface(VADriverContextP ctx,
+                               struct i965_gpe_context *gpe_context,
+                               struct i965_gpe_resource *gpe_buffer,
+                               int is_media_block_rw,
+                               unsigned int format,
+                               int index)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_gpe_table *gpe = &i965->gpe_table;
+    struct i965_gpe_surface gpe_surface;
+
+    memset(&gpe_surface, 0, sizeof(gpe_surface));
+
+    gpe_surface.gpe_resource = gpe_buffer;
+    gpe_surface.is_2d_surface = 1;
+    gpe_surface.is_media_block_rw = !!is_media_block_rw;
+    gpe_surface.cacheability_control = i965->intel.mocs_state;
+    gpe_surface.format = format;
+
+    gpe->context_add_surface(gpe_context, &gpe_surface, index);
+}
+
+void
+gen9_add_dri_buffer_gpe_surface(VADriverContextP ctx,
+                                struct i965_gpe_context *gpe_context,
+                                dri_bo *bo,
+                                int is_raw_buffer,
+                                unsigned int size,
+                                unsigned int offset,
+                                int index)
+{
+    struct i965_gpe_resource gpe_resource;
+
+    i965_dri_object_to_buffer_gpe_resource(&gpe_resource, bo);
+    i965_add_buffer_gpe_surface(ctx,
+                                gpe_context,
+                                &gpe_resource,
+                                is_raw_buffer,
+                                size,
+                                offset,
+                                index);
+
+    i965_free_gpe_resource(&gpe_resource);
+}
+
+bool
+i965_gpe_table_init(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_gpe_table *gpe = &i965->gpe_table;
+
+    if (IS_GEN8(i965->intel.device_info)) {
+        gpe->context_init = gen8_gpe_context_init;
+        gpe->context_destroy = gen8_gpe_context_destroy;
+        gpe->context_add_surface = gen8_gpe_context_add_surface;
+        gpe->reset_binding_table = gen8_gpe_reset_binding_table;
+        gpe->load_kernels = gen8_gpe_load_kernels;
+        gpe->setup_interface_data = gen8_gpe_setup_interface_data;
+        gpe->set_dynamic_buffer = gen8_gpe_context_set_dynamic_buffer;
+        gpe->media_object = gen8_gpe_media_object;
+        gpe->media_object_walker = gen8_gpe_media_object_walker;
+        gpe->media_state_flush = gen8_gpe_media_state_flush;
+        gpe->pipe_control = gen8_gpe_pipe_control;
+        gpe->pipeline_end = gen8_gpe_pipeline_end;
+        gpe->pipeline_setup = gen8_gpe_pipeline_setup;
+        gpe->mi_conditional_batch_buffer_end = gen8_gpe_mi_conditional_batch_buffer_end;
+        gpe->mi_batch_buffer_start = gen8_gpe_mi_batch_buffer_start;
+        gpe->mi_load_register_reg = gen8_gpe_mi_load_register_reg;
+        gpe->mi_load_register_imm = gen8_gpe_mi_load_register_imm;
+        gpe->mi_load_register_mem = gen8_gpe_mi_load_register_mem;
+        gpe->mi_store_register_mem = gen8_gpe_mi_store_register_mem;
+        gpe->mi_store_data_imm = gen8_gpe_mi_store_data_imm;
+        gpe->mi_flush_dw = gen8_gpe_mi_flush_dw;
+        gpe->mi_copy_mem_mem = gen8_gpe_mi_copy_mem_mem;
+    } else if (IS_GEN9(i965->intel.device_info) ||
+               IS_GEN10(i965->intel.device_info)) {
+        gpe->context_init = gen8_gpe_context_init;
+        gpe->context_destroy = gen8_gpe_context_destroy;
+        gpe->context_add_surface = gen9_gpe_context_add_surface;
+        gpe->reset_binding_table = gen9_gpe_reset_binding_table;
+        gpe->load_kernels = gen8_gpe_load_kernels;
+        gpe->setup_interface_data = gen8_gpe_setup_interface_data;
+        gpe->set_dynamic_buffer = gen8_gpe_context_set_dynamic_buffer;
+        gpe->media_object = gen8_gpe_media_object;
+        gpe->media_object_walker = gen8_gpe_media_object_walker;
+        gpe->media_state_flush = gen8_gpe_media_state_flush;
+        gpe->pipe_control = gen8_gpe_pipe_control;
+        gpe->pipeline_end = gen9_gpe_pipeline_end;
+        gpe->pipeline_setup = gen9_gpe_pipeline_setup;
+        gpe->mi_conditional_batch_buffer_end = gen9_gpe_mi_conditional_batch_buffer_end;
+        gpe->mi_batch_buffer_start = gen8_gpe_mi_batch_buffer_start;
+        gpe->mi_load_register_reg = gen8_gpe_mi_load_register_reg;
+        gpe->mi_load_register_imm = gen8_gpe_mi_load_register_imm;
+        gpe->mi_load_register_mem = gen8_gpe_mi_load_register_mem;
+        gpe->mi_store_register_mem = gen8_gpe_mi_store_register_mem;
+        gpe->mi_store_data_imm = gen8_gpe_mi_store_data_imm;
+        gpe->mi_flush_dw = gen8_gpe_mi_flush_dw;
+        gpe->mi_copy_mem_mem = gen8_gpe_mi_copy_mem_mem;
+    } else {
+        // TODO: for other platforms
+    }
+
+    return true;
+}
+
+void
+i965_gpe_table_terminate(VADriverContextP ctx)
+{
+
+}