X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=src%2Fgen75_vpp_gpe.c;h=ad893e83fb6cb36c8b115f3ca693f4e51a3a3039;hb=6907a27b6ed1eaa7acd2b3bd0930bc81501ab4f6;hp=52a0e2ffeea86a0e3407466646af18812243dec5;hpb=b5d6d9d425a6d539b27d22992bda05f79d1a0622;p=android-x86%2Fhardware-intel-common-vaapi.git diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index 52a0e2f..ad893e8 100644 --- a/src/gen75_vpp_gpe.c +++ b/src/gen75_vpp_gpe.c @@ -47,53 +47,53 @@ #define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index)) #define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index)) -#define CURBE_ALLOCATION_SIZE 37 -#define CURBE_TOTAL_DATA_LENGTH (4 * 32) -#define CURBE_URB_ENTRY_LENGTH 4 +#define CURBE_ALLOCATION_SIZE 37 +#define CURBE_TOTAL_DATA_LENGTH (4 * 32) +#define CURBE_URB_ENTRY_LENGTH 4 /* Shaders information for sharpening */ static const unsigned int gen75_gpe_sharpening_h_blur[][4] = { - #include "shaders/post_processing/gen75/sharpening_h_blur.g75b" +#include "shaders/post_processing/gen75/sharpening_h_blur.g75b" }; static const unsigned int gen75_gpe_sharpening_v_blur[][4] = { - #include "shaders/post_processing/gen75/sharpening_v_blur.g75b" +#include "shaders/post_processing/gen75/sharpening_v_blur.g75b" }; static const unsigned int gen75_gpe_sharpening_unmask[][4] = { - #include "shaders/post_processing/gen75/sharpening_unmask.g75b" +#include "shaders/post_processing/gen75/sharpening_unmask.g75b" }; static struct i965_kernel gen75_vpp_sharpening_kernels[] = { { "vpp: sharpening(horizontal blur)", VPP_GPE_SHARPENING, - gen75_gpe_sharpening_h_blur, - sizeof(gen75_gpe_sharpening_h_blur), + gen75_gpe_sharpening_h_blur, + sizeof(gen75_gpe_sharpening_h_blur), NULL }, { "vpp: sharpening(vertical blur)", VPP_GPE_SHARPENING, - gen75_gpe_sharpening_v_blur, - sizeof(gen75_gpe_sharpening_v_blur), + gen75_gpe_sharpening_v_blur, + sizeof(gen75_gpe_sharpening_v_blur), NULL }, { "vpp: sharpening(unmask)", VPP_GPE_SHARPENING, - gen75_gpe_sharpening_unmask, - sizeof(gen75_gpe_sharpening_unmask), + gen75_gpe_sharpening_unmask, + sizeof(gen75_gpe_sharpening_unmask), NULL }, -}; +}; /* sharpening kernels for Broadwell */ static const unsigned int gen8_gpe_sharpening_h_blur[][4] = { - #include "shaders/post_processing/gen8/sharpening_h_blur.g8b" +#include "shaders/post_processing/gen8/sharpening_h_blur.g8b" }; static const unsigned int gen8_gpe_sharpening_v_blur[][4] = { - #include "shaders/post_processing/gen8/sharpening_v_blur.g8b" +#include "shaders/post_processing/gen8/sharpening_v_blur.g8b" }; static const unsigned int gen8_gpe_sharpening_unmask[][4] = { - #include "shaders/post_processing/gen8/sharpening_unmask.g8b" +#include "shaders/post_processing/gen8/sharpening_unmask.g8b" }; static struct i965_kernel gen8_vpp_sharpening_kernels[] = { @@ -122,28 +122,30 @@ static struct i965_kernel gen8_vpp_sharpening_kernels[] = { static VAStatus gen75_gpe_process_surfaces_setup(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { struct object_surface *obj_surface; unsigned int i = 0; unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum + - vpp_gpe_ctx->backward_surf_sum) * 2; + vpp_gpe_ctx->backward_surf_sum) * 2; /* Binding input NV12 surfaces (Luma + Chroma)*/ - for( i = 0; i < input_surface_sum; i += 2){ - obj_surface = vpp_gpe_ctx->surface_input_object[i/2]; - assert(obj_surface); - gen7_gpe_media_rw_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET_GEN7(i), - SURFACE_STATE_OFFSET_GEN7(i)); - - gen75_gpe_media_chroma_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET_GEN7(i + 1), - SURFACE_STATE_OFFSET_GEN7(i + 1)); + for (i = 0; i < input_surface_sum; i += 2) { + obj_surface = vpp_gpe_ctx->surface_input_object[i / 2]; + assert(obj_surface); + gen7_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN7(i), + SURFACE_STATE_OFFSET_GEN7(i), + 0); + + gen75_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN7(i + 1), + SURFACE_STATE_OFFSET_GEN7(i + 1), + 0); } /* Binding output NV12 surface(Luma + Chroma) */ @@ -153,36 +155,38 @@ gen75_gpe_process_surfaces_setup(VADriverContextP ctx, &vpp_gpe_ctx->gpe_ctx, obj_surface, BINDING_TABLE_OFFSET_GEN7(input_surface_sum), - SURFACE_STATE_OFFSET_GEN7(input_surface_sum)); + SURFACE_STATE_OFFSET_GEN7(input_surface_sum), + 1); gen75_gpe_media_chroma_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1), - SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1)); + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1), + SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1), + 1); /* Bind kernel return buffer surface */ gen7_gpe_buffer_suface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - &vpp_gpe_ctx->vpp_kernel_return, - BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)), - SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2)); + &vpp_gpe_ctx->gpe_ctx, + &vpp_gpe_ctx->vpp_kernel_return, + BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)), + SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2)); return VA_STATUS_SUCCESS; } static VAStatus gen75_gpe_process_interface_setup(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { - struct gen6_interface_descriptor_data *desc; + struct gen6_interface_descriptor_data *desc; dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo; - int i; + int i; dri_bo_map(bo, 1); assert(bo->virtual); desc = bo->virtual; - + /*Setup the descritor table*/ - for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){ + for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++) { struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i]; assert(sizeof(*desc) == 32); memset(desc, 0, sizeof(*desc)); @@ -194,10 +198,10 @@ gen75_gpe_process_interface_setup(VADriverContextP ctx, desc->desc4.constant_urb_entry_read_offset = 0; desc->desc4.constant_urb_entry_read_length = 0; - dri_bo_emit_reloc(bo, + dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0, - i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0), + i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0), kernel->bo); desc++; } @@ -207,23 +211,9 @@ gen75_gpe_process_interface_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } -static VAStatus -gen75_gpe_process_constant_fill(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) -{ - dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1); - assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual); - unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual; - memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, - vpp_gpe_ctx->kernel_param_size); - dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo); - - return VA_STATUS_SUCCESS; -} - -static VAStatus +static VAStatus gen75_gpe_process_parameters_fill(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { unsigned int *command_ptr; unsigned int i, size = vpp_gpe_ctx->thread_param_size; @@ -233,20 +223,19 @@ gen75_gpe_process_parameters_fill(VADriverContextP ctx, dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1); command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual; - for(i = 0; i < vpp_gpe_ctx->thread_num; i ++) - { - *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2)); - *command_ptr++ = vpp_gpe_ctx->sub_shader_index; - *command_ptr++ = 0; - *command_ptr++ = 0; - *command_ptr++ = 0; - *command_ptr++ = 0; - - /* copy thread inline data */ - position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i); - memcpy(command_ptr, position, size); - command_ptr += size/sizeof(int); - } + for (i = 0; i < vpp_gpe_ctx->thread_num; i ++) { + *command_ptr++ = (CMD_MEDIA_OBJECT | (size / sizeof(int) + 6 - 2)); + *command_ptr++ = vpp_gpe_ctx->sub_shader_index; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + + /* copy thread inline data */ + position = (unsigned char*)(vpp_gpe_ctx->thread_param + size * i); + memcpy(command_ptr, position, size); + command_ptr += size / sizeof(int); + } *command_ptr++ = 0; *command_ptr++ = MI_BATCH_BUFFER_END; @@ -258,54 +247,54 @@ gen75_gpe_process_parameters_fill(VADriverContextP ctx, static VAStatus gen75_gpe_process_pipeline_setup(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000); intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch); gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch); - + gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx); - + BEGIN_BATCH(vpp_gpe_ctx->batch, 2); OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8)); OUT_RELOC(vpp_gpe_ctx->batch, vpp_gpe_ctx->vpp_batchbuffer.bo, - I915_GEM_DOMAIN_COMMAND, 0, + I915_GEM_DOMAIN_COMMAND, 0, 0); ADVANCE_BATCH(vpp_gpe_ctx->batch); intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch); - + return VA_STATUS_SUCCESS; } static VAStatus gen75_gpe_process_init(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); dri_bo *bo; - unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * - (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16; + unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * + (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16; vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num; vpp_gpe_ctx->vpp_kernel_return.size_block = 16; vpp_gpe_ctx->vpp_kernel_return.pitch = 1; - unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks - * vpp_gpe_ctx->vpp_kernel_return.size_block; - + unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks + * vpp_gpe_ctx->vpp_kernel_return.size_block; + dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo); bo = dri_bo_alloc(i965->intel.bufmgr, "vpp batch buffer", - batch_buf_size, 0x1000); + batch_buf_size, 0x1000); vpp_gpe_ctx->vpp_batchbuffer.bo = bo; dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo); bo = dri_bo_alloc(i965->intel.bufmgr, "vpp kernel return buffer", - kernel_return_size, 0x1000); + kernel_return_size, 0x1000); vpp_gpe_ctx->vpp_kernel_return.bo = bo; vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx); @@ -315,7 +304,7 @@ gen75_gpe_process_init(VADriverContextP ctx, static VAStatus gen75_gpe_process_prepare(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { /*Setup all the memory object*/ gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx); @@ -324,16 +313,16 @@ gen75_gpe_process_prepare(VADriverContextP ctx, /*Programing media pipeline*/ gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx); - + return VA_STATUS_SUCCESS; } static VAStatus gen75_gpe_process_run(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { intel_batchbuffer_flush(vpp_gpe_ctx->batch); - + return VA_STATUS_SUCCESS; } @@ -360,28 +349,30 @@ gen75_gpe_process(VADriverContextP ctx, static VAStatus gen8_gpe_process_surfaces_setup(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { struct object_surface *obj_surface; unsigned int i = 0; unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum + - vpp_gpe_ctx->backward_surf_sum) * 2; + vpp_gpe_ctx->backward_surf_sum) * 2; /* Binding input NV12 surfaces (Luma + Chroma)*/ - for( i = 0; i < input_surface_sum; i += 2){ - obj_surface = vpp_gpe_ctx->surface_input_object[i/2]; - assert(obj_surface); - gen8_gpe_media_rw_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET_GEN8(i), - SURFACE_STATE_OFFSET_GEN8(i)); - - gen8_gpe_media_chroma_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET_GEN8(i + 1), - SURFACE_STATE_OFFSET_GEN8(i + 1)); + for (i = 0; i < input_surface_sum; i += 2) { + obj_surface = vpp_gpe_ctx->surface_input_object[i / 2]; + assert(obj_surface); + gen8_gpe_media_rw_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(i), + SURFACE_STATE_OFFSET_GEN8(i), + 0); + + gen8_gpe_media_chroma_surface_setup(ctx, + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(i + 1), + SURFACE_STATE_OFFSET_GEN8(i + 1), + 0); } /* Binding output NV12 surface(Luma + Chroma) */ @@ -391,51 +382,53 @@ gen8_gpe_process_surfaces_setup(VADriverContextP ctx, &vpp_gpe_ctx->gpe_ctx, obj_surface, BINDING_TABLE_OFFSET_GEN8(input_surface_sum), - SURFACE_STATE_OFFSET_GEN8(input_surface_sum)); + SURFACE_STATE_OFFSET_GEN8(input_surface_sum), + 1); gen8_gpe_media_chroma_surface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - obj_surface, - BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1), - SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1)); + &vpp_gpe_ctx->gpe_ctx, + obj_surface, + BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1), + SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1), + 1); /* Bind kernel return buffer surface */ gen7_gpe_buffer_suface_setup(ctx, - &vpp_gpe_ctx->gpe_ctx, - &vpp_gpe_ctx->vpp_kernel_return, - BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)), - SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2)); + &vpp_gpe_ctx->gpe_ctx, + &vpp_gpe_ctx->vpp_kernel_return, + BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)), + SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2)); return VA_STATUS_SUCCESS; } static VAStatus gen8_gpe_process_interface_setup(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { struct gen8_interface_descriptor_data *desc; - dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo; + dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo; int i; dri_bo_map(bo, 1); assert(bo->virtual); desc = (struct gen8_interface_descriptor_data *)(bo->virtual - + vpp_gpe_ctx->gpe_ctx.idrt_offset); + + vpp_gpe_ctx->gpe_ctx.idrt.offset); /*Setup the descritor table*/ - for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){ + for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++) { struct i965_kernel *kernel; kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i]; assert(sizeof(*desc) == 32); /*Setup the descritor table*/ - memset(desc, 0, sizeof(*desc)); - desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6; - desc->desc3.sampler_count = 0; /* FIXME: */ - desc->desc3.sampler_state_pointer = 0; - desc->desc4.binding_table_entry_count = 6; /* FIXME: */ - desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5); - desc->desc5.constant_urb_entry_read_offset = 0; - desc->desc5.constant_urb_entry_read_length = 0; - - desc++; + memset(desc, 0, sizeof(*desc)); + desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6; + desc->desc3.sampler_count = 0; /* FIXME: */ + desc->desc3.sampler_state_pointer = 0; + desc->desc4.binding_table_entry_count = 6; /* FIXME: */ + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + desc->desc5.constant_urb_entry_read_length = 0; + + desc++; } dri_bo_unmap(bo); @@ -444,22 +437,8 @@ gen8_gpe_process_interface_setup(VADriverContextP ctx, } static VAStatus -gen8_gpe_process_constant_fill(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) -{ - dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1); - assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual); - unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual; - memcpy(constant_buffer, vpp_gpe_ctx->kernel_param, - vpp_gpe_ctx->kernel_param_size); - dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo); - - return VA_STATUS_SUCCESS; -} - -static VAStatus gen8_gpe_process_parameters_fill(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { unsigned int *command_ptr; unsigned int i, size = vpp_gpe_ctx->thread_param_size; @@ -469,22 +448,21 @@ gen8_gpe_process_parameters_fill(VADriverContextP ctx, dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1); command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual; - for(i = 0; i < vpp_gpe_ctx->thread_num; i ++) - { - *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2)); - *command_ptr++ = vpp_gpe_ctx->sub_shader_index; - *command_ptr++ = 0; - *command_ptr++ = 0; - *command_ptr++ = 0; - *command_ptr++ = 0; - - /* copy thread inline data */ - position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i); - memcpy(command_ptr, position, size); - command_ptr += size/sizeof(int); - - *command_ptr++ = CMD_MEDIA_STATE_FLUSH; - *command_ptr++ = 0; + for (i = 0; i < vpp_gpe_ctx->thread_num; i ++) { + *command_ptr++ = (CMD_MEDIA_OBJECT | (size / sizeof(int) + 6 - 2)); + *command_ptr++ = vpp_gpe_ctx->sub_shader_index; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + *command_ptr++ = 0; + + /* copy thread inline data */ + position = (unsigned char*)(vpp_gpe_ctx->thread_param + size * i); + memcpy(command_ptr, position, size); + command_ptr += size / sizeof(int); + + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; + *command_ptr++ = 0; } *command_ptr++ = 0; @@ -497,7 +475,7 @@ gen8_gpe_process_parameters_fill(VADriverContextP ctx, static VAStatus gen8_gpe_process_pipeline_setup(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000); intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch); @@ -523,31 +501,31 @@ gen8_gpe_process_pipeline_setup(VADriverContextP ctx, static VAStatus gen8_gpe_process_init(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); dri_bo *bo; unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * - (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16; + (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16; vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num; vpp_gpe_ctx->vpp_kernel_return.size_block = 16; vpp_gpe_ctx->vpp_kernel_return.pitch = 1; unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks - * vpp_gpe_ctx->vpp_kernel_return.size_block; + * vpp_gpe_ctx->vpp_kernel_return.size_block; dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo); bo = dri_bo_alloc(i965->intel.bufmgr, "vpp batch buffer", - batch_buf_size, 0x1000); + batch_buf_size, 0x1000); vpp_gpe_ctx->vpp_batchbuffer.bo = bo; dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo); bo = dri_bo_alloc(i965->intel.bufmgr, "vpp kernel return buffer", - kernel_return_size, 0x1000); + kernel_return_size, 0x1000); vpp_gpe_ctx->vpp_kernel_return.bo = bo; vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx); @@ -557,7 +535,7 @@ gen8_gpe_process_init(VADriverContextP ctx, static VAStatus gen8_gpe_process_prepare(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { /*Setup all the memory object*/ gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx); @@ -572,7 +550,7 @@ gen8_gpe_process_prepare(VADriverContextP ctx, static VAStatus gen8_gpe_process_run(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { intel_batchbuffer_flush(vpp_gpe_ctx->batch); @@ -581,7 +559,7 @@ gen8_gpe_process_run(VADriverContextP ctx, static VAStatus gen8_gpe_process(VADriverContextP ctx, - struct vpp_gpe_context * vpp_gpe_ctx) + struct vpp_gpe_context * vpp_gpe_ctx) { VAStatus va_status = VA_STATUS_SUCCESS; @@ -602,80 +580,82 @@ gen8_gpe_process(VADriverContextP ctx, static VAStatus vpp_gpe_process(VADriverContextP ctx, - struct vpp_gpe_context * vpp_gpe_ctx) + struct vpp_gpe_context * vpp_gpe_ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); if (IS_HASWELL(i965->intel.device_info)) - return gen75_gpe_process(ctx, vpp_gpe_ctx); + return gen75_gpe_process(ctx, vpp_gpe_ctx); else if (IS_GEN8(i965->intel.device_info) || IS_GEN9(i965->intel.device_info)) - return gen8_gpe_process(ctx, vpp_gpe_ctx); + return gen8_gpe_process(ctx, vpp_gpe_ctx); - return VA_STATUS_ERROR_UNIMPLEMENTED; + return VA_STATUS_ERROR_UNIMPLEMENTED; } static VAStatus vpp_gpe_process_sharpening(VADriverContextP ctx, - struct vpp_gpe_context * vpp_gpe_ctx) + struct vpp_gpe_context * vpp_gpe_ctx) { - VAStatus va_status = VA_STATUS_SUCCESS; - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0]; - struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object; - - VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param; - VABufferID *filter_ids = (VABufferID*)pipe->filters ; - struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0))); - - assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer); - - if (!obj_buf || - !obj_buf->buffer_store || - !obj_buf->buffer_store->buffer) - goto error; - - VAProcFilterParameterBuffer* filter = - (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; - float sharpening_intensity = filter->value; - - ThreadParameterSharpening thr_param; - unsigned int thr_param_size = sizeof(ThreadParameterSharpening); - unsigned int i; - unsigned char * pos; - - if(vpp_gpe_ctx->is_first_frame){ - vpp_gpe_ctx->sub_shader_sum = 3; - struct i965_kernel * vpp_kernels; - if (IS_HASWELL(i965->intel.device_info)) - vpp_kernels = gen75_vpp_sharpening_kernels; - else if (IS_GEN8(i965->intel.device_info) || - IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9 - vpp_kernels = gen8_vpp_sharpening_kernels; - - vpp_gpe_ctx->gpe_load_kernels(ctx, - &vpp_gpe_ctx->gpe_ctx, - vpp_kernels, - vpp_gpe_ctx->sub_shader_sum); - } - - if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){ + VAStatus va_status = VA_STATUS_SUCCESS; + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0]; + struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object; + + VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param; + VABufferID *filter_ids = (VABufferID*)pipe->filters ; + struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0))); + + assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer); + + if (!obj_buf || + !obj_buf->buffer_store || + !obj_buf->buffer_store->buffer) + goto error; + + VAProcFilterParameterBuffer* filter = + (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; + float sharpening_intensity = filter->value; + + ThreadParameterSharpening thr_param; + unsigned int thr_param_size = sizeof(ThreadParameterSharpening); + unsigned int i; + unsigned char * pos; + + if (vpp_gpe_ctx->is_first_frame) { + vpp_gpe_ctx->sub_shader_sum = 3; + struct i965_kernel * vpp_kernels; + if (IS_HASWELL(i965->intel.device_info)) + vpp_kernels = gen75_vpp_sharpening_kernels; + else if (IS_GEN8(i965->intel.device_info) || + IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9 + vpp_kernels = gen8_vpp_sharpening_kernels; + else + return VA_STATUS_ERROR_UNIMPLEMENTED; + + vpp_gpe_ctx->gpe_load_kernels(ctx, + &vpp_gpe_ctx->gpe_ctx, + vpp_kernels, + vpp_gpe_ctx->sub_shader_sum); + } + + if (vpp_gpe_ctx->surface_tmp == VA_INVALID_ID) { va_status = i965_CreateSurfaces(ctx, - vpp_gpe_ctx->in_frame_w, - vpp_gpe_ctx->in_frame_h, - VA_RT_FORMAT_YUV420, - 1, - &vpp_gpe_ctx->surface_tmp); - assert(va_status == VA_STATUS_SUCCESS); - - struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp); - assert(obj_surf); - - if (obj_surf) { - i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12, - SUBSAMPLE_YUV420); - vpp_gpe_ctx->surface_tmp_object = obj_surf; - } - } + vpp_gpe_ctx->in_frame_w, + vpp_gpe_ctx->in_frame_h, + VA_RT_FORMAT_YUV420, + 1, + &vpp_gpe_ctx->surface_tmp); + assert(va_status == VA_STATUS_SUCCESS); + + struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp); + assert(obj_surf); + + if (obj_surf) { + i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12, + SUBSAMPLE_YUV420); + vpp_gpe_ctx->surface_tmp_object = obj_surf; + } + } assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0); thr_param.l_amount = (unsigned int)(sharpening_intensity * 128); @@ -684,21 +664,21 @@ vpp_gpe_process_sharpening(VADriverContextP ctx, thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w; thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h; - /* Step 1: horizontal blur process */ + /* Step 1: horizontal blur process */ vpp_gpe_ctx->forward_surf_sum = 0; vpp_gpe_ctx->backward_surf_sum = 0; - - vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16; + + vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h / 16; vpp_gpe_ctx->thread_param_size = thr_param_size; vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size - *vpp_gpe_ctx->thread_num); + * vpp_gpe_ctx->thread_num); pos = vpp_gpe_ctx->thread_param; if (!pos) { return VA_STATUS_ERROR_ALLOCATION_FAILED; } - for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){ + for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) { thr_param.base.v_pos = 16 * i; thr_param.base.h_pos = 0; memcpy(pos, &thr_param, thr_param_size); @@ -709,23 +689,23 @@ vpp_gpe_process_sharpening(VADriverContextP ctx, va_status = vpp_gpe_process(ctx, vpp_gpe_ctx); free(vpp_gpe_ctx->thread_param); - /* Step 2: vertical blur process */ + /* Step 2: vertical blur process */ vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object; vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object; vpp_gpe_ctx->forward_surf_sum = 0; vpp_gpe_ctx->backward_surf_sum = 0; - - vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16; + + vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w / 16; vpp_gpe_ctx->thread_param_size = thr_param_size; vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size - *vpp_gpe_ctx->thread_num); + * vpp_gpe_ctx->thread_num); pos = vpp_gpe_ctx->thread_param; if (!pos) { return VA_STATUS_ERROR_ALLOCATION_FAILED; } - for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){ + for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) { thr_param.base.v_pos = 0; thr_param.base.h_pos = 16 * i; memcpy(pos, &thr_param, thr_param_size); @@ -736,24 +716,24 @@ vpp_gpe_process_sharpening(VADriverContextP ctx, vpp_gpe_process(ctx, vpp_gpe_ctx); free(vpp_gpe_ctx->thread_param); - /* Step 3: apply the blur to original surface */ + /* Step 3: apply the blur to original surface */ vpp_gpe_ctx->surface_input_object[0] = origin_in_obj_surface; vpp_gpe_ctx->surface_input_object[1] = vpp_gpe_ctx->surface_tmp_object; vpp_gpe_ctx->surface_output_object = origin_out_obj_surface; vpp_gpe_ctx->forward_surf_sum = 1; vpp_gpe_ctx->backward_surf_sum = 0; - - vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4; + + vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h / 4; vpp_gpe_ctx->thread_param_size = thr_param_size; vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size - *vpp_gpe_ctx->thread_num); + * vpp_gpe_ctx->thread_num); pos = vpp_gpe_ctx->thread_param; if (!pos) { return VA_STATUS_ERROR_ALLOCATION_FAILED; } - for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){ + for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) { thr_param.base.v_pos = 4 * i; thr_param.base.h_pos = 0; memcpy(pos, &thr_param, thr_param_size); @@ -771,7 +751,7 @@ error: } VAStatus vpp_gpe_process_picture(VADriverContextP ctx, - struct vpp_gpe_context * vpp_gpe_ctx) + struct vpp_gpe_context * vpp_gpe_ctx) { VAStatus va_status = VA_STATUS_SUCCESS; struct i965_driver_data *i965 = i965_driver_data(ctx); @@ -783,7 +763,7 @@ VAStatus vpp_gpe_process_picture(VADriverContextP ctx, if (pipe->num_filters && !pipe->filters) goto error; - for(i = 0; i < pipe->num_filters; i++){ + for (i = 0; i < pipe->num_filters; i++) { struct object_buffer *obj_buf = BUFFER(pipe->filters[i]); assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer); @@ -794,43 +774,41 @@ VAStatus vpp_gpe_process_picture(VADriverContextP ctx, goto error; filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer; - if(filter->type == VAProcFilterSharpening){ - break; + if (filter->type == VAProcFilterSharpening) { + break; } } - + assert(pipe->num_forward_references + pipe->num_backward_references <= 4); vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object; vpp_gpe_ctx->forward_surf_sum = 0; vpp_gpe_ctx->backward_surf_sum = 0; - - for(i = 0; i < pipe->num_forward_references; i ++) - { + + for (i = 0; i < pipe->num_forward_references; i ++) { obj_surface = SURFACE(pipe->forward_references[i]); assert(obj_surface); vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface; vpp_gpe_ctx->forward_surf_sum++; - } + } - for(i = 0; i < pipe->num_backward_references; i ++) - { + for (i = 0; i < pipe->num_backward_references; i ++) { obj_surface = SURFACE(pipe->backward_references[i]); - + assert(obj_surface); vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface; vpp_gpe_ctx->backward_surf_sum++; - } + } obj_surface = vpp_gpe_ctx->surface_input_object[0]; vpp_gpe_ctx->in_frame_w = obj_surface->orig_width; vpp_gpe_ctx->in_frame_h = obj_surface->orig_height; - if(filter && filter->type == VAProcFilterSharpening) { - va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); + if (filter && filter->type == VAProcFilterSharpening) { + va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); } else { - va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED; + va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED; } vpp_gpe_ctx->is_first_frame = 0; @@ -841,9 +819,9 @@ error: return VA_STATUS_ERROR_INVALID_PARAMETER; } -void +void vpp_gpe_context_destroy(VADriverContextP ctx, - struct vpp_gpe_context *vpp_gpe_ctx) + struct vpp_gpe_context *vpp_gpe_ctx) { dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo); vpp_gpe_ctx->vpp_batchbuffer.bo = NULL; @@ -853,12 +831,12 @@ vpp_gpe_context_destroy(VADriverContextP ctx, vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx); - if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){ + if (vpp_gpe_ctx->surface_tmp != VA_INVALID_ID) { assert(vpp_gpe_ctx->surface_tmp_object != NULL); i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1); vpp_gpe_ctx->surface_tmp = VA_INVALID_ID; vpp_gpe_ctx->surface_tmp_object = NULL; - } + } if (vpp_gpe_ctx->batch) intel_batchbuffer_free(vpp_gpe_ctx->batch); @@ -871,6 +849,7 @@ vpp_gpe_context_init(VADriverContextP ctx) { struct i965_driver_data *i965 = i965_driver_data(ctx); struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context)); + assert(vpp_gpe_ctx); struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx); assert(IS_HASWELL(i965->intel.device_info) || @@ -887,17 +866,17 @@ vpp_gpe_context_init(VADriverContextP ctx) gpe_ctx->vfe_state.gpgpu_mode = 0; gpe_ctx->vfe_state.urb_entry_size = 59 - 1; gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1; - + if (IS_HASWELL(i965->intel.device_info)) { vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init; vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy; vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels; gpe_ctx->surface_state_binding_table.length = - (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH; gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6; - gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); + gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen6_interface_descriptor_data), 64); } else if (IS_GEN8(i965->intel.device_info) || IS_GEN9(i965->intel.device_info)) { @@ -905,11 +884,11 @@ vpp_gpe_context_init(VADriverContextP ctx) vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy; vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels; gpe_ctx->surface_state_binding_table.length = - (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; - - gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH; - gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6; + (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; + gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH; + gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); + gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6; } return vpp_gpe_ctx;