From a4ff1bda484bcb99ec8da888d290b9e85f0b2b9e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 13 Feb 2012 09:09:04 +0800 Subject: [PATCH] Fill VME batchbuffer by GPU instead of CPU Signed-off-by: Xiang, Haihao --- src/gen6_vme.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++---------- src/gen6_vme.h | 9 ++- 2 files changed, 182 insertions(+), 35 deletions(-) diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 7e0522b..974fa16 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -51,8 +51,9 @@ #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) -#define VME_INTRA_SHADER 0 -#define VME_INTER_SHADER 1 +#define VME_INTRA_SHADER 0 +#define VME_INTER_SHADER 1 +#define VME_BATCHBUFFER 2 #define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */ #define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */ @@ -66,6 +67,9 @@ static const uint32_t gen6_vme_inter_frame[][4] = { #include "shaders/vme/inter_frame.g6b" }; +static const uint32_t gen6_vme_batchbuffer[][4] = { +}; + static struct i965_kernel gen6_vme_kernels[] = { { "VME Intra Frame", @@ -80,7 +84,14 @@ static struct i965_kernel gen6_vme_kernels[] = { gen6_vme_inter_frame, sizeof(gen6_vme_inter_frame), NULL - } + }, + { + "VME BATCHBUFFER", + VME_BATCHBUFFER, + gen6_vme_batchbuffer, + sizeof(gen6_vme_batchbuffer), + NULL + }, }; static const uint32_t gen7_vme_intra_frame[][4] = { @@ -91,6 +102,10 @@ static const uint32_t gen7_vme_inter_frame[][4] = { #include "shaders/vme/inter_frame.g7b" }; +static const uint32_t gen7_vme_batchbuffer[][4] = { +#include "shaders/vme/batchbuffer.g7b" +}; + static struct i965_kernel gen7_vme_kernels[] = { { "VME Intra Frame", @@ -105,7 +120,14 @@ static struct i965_kernel gen7_vme_kernels[] = { gen7_vme_inter_frame, sizeof(gen7_vme_inter_frame), NULL - } + }, + { + "VME BATCHBUFFER", + VME_BATCHBUFFER, + gen7_vme_batchbuffer, + sizeof(gen7_vme_batchbuffer), + NULL + }, }; static void @@ -544,6 +566,64 @@ gen7_vme_output_buffer_setup(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +static VAStatus +gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx, + struct encode_state *encode_state, + int index, + struct intel_encoder_context *encoder_context) + +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct gen7_surface_state *ss; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int num_entries; + dri_bo *bo; + + vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1; + vme_context->vme_batchbuffer.size_block = 32; /* an OWORD */ + vme_context->vme_batchbuffer.pitch = 16; + bo = dri_bo_alloc(i965->intel.bufmgr, + "VME batchbuffer", + vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block, + 0x1000); + assert(bo); + vme_context->vme_batchbuffer.bo = bo; + + bo = vme_context->surface_state_binding_table.bo; + dri_bo_map(bo, 1); + assert(bo->virtual); + + ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index)); + memset(ss, 0, sizeof(*ss)); + + num_entries = vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block / vme_context->vme_batchbuffer.pitch; + + ss->ss0.render_cache_read_write = 1; + ss->ss0.surface_type = I965_SURFACE_BUFFER; + + ss->ss1.base_addr = vme_context->vme_batchbuffer.bo->offset; + + ss->ss2.width = ((num_entries - 1) & 0x7f); + ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff); + ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f); + + ss->ss3.pitch = vme_context->vme_batchbuffer.pitch - 1; + + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0, + SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1), + vme_context->vme_batchbuffer.bo); + + ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index); + dri_bo_unmap(bo); + + return VA_STATUS_SUCCESS; +} + static VAStatus gen7_vme_surface_setup(VADriverContextP ctx, struct encode_state *encode_state, int is_intra, @@ -576,6 +656,7 @@ static VAStatus gen7_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen7_vme_output_buffer_setup(ctx, encode_state, 0, encoder_context); + gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); return VA_STATUS_SUCCESS; } @@ -775,35 +856,80 @@ static void gen6_vme_idrt(VADriverContextP ctx, struct intel_encoder_context *en ADVANCE_BATCH(batch); } -static int gen6_vme_media_object(VADriverContextP ctx, - struct encode_state *encode_state, - int mb_x, int mb_y, - int kernel, - struct intel_encoder_context *encoder_context) +static void +gen6_vme_fill_vme_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + int mb_width, int mb_height, + int kernel, + int transform_8x8_mode_flag, + struct intel_encoder_context *encoder_context) { - struct i965_driver_data *i965 = i965_driver_data(ctx); struct intel_batchbuffer *batch = encoder_context->base.batch; - struct object_surface *obj_surface = SURFACE(encoder_context->input_yuv_surface); - VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - int mb_width = ALIGN(obj_surface->orig_width, 16) / 16; - int len_in_dowrds = 6 + 2; /*6 + n: n is number of inline data*/ + int mb_x, mb_y, i; + int total_mbs = mb_width * mb_height; + int number_mb_cmds = 512; + int starting_mb = 0; + int last_object = 0; + + for (i = 0; i < total_mbs / number_mb_cmds; i++) { + mb_x = starting_mb % mb_width; + mb_y = starting_mb / mb_width; + last_object = (total_mbs - starting_mb) == number_mb_cmds; + starting_mb += number_mb_cmds; + + BEGIN_BATCH(batch, 9); + + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (9 - 2)); + OUT_BATCH(batch, VME_BATCHBUFFER); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + + /*inline data */ + OUT_BATCH(batch, + kernel << 24 | + transform_8x8_mode_flag << 16 | + mb_width); + OUT_BATCH(batch, + number_mb_cmds << 16 | + mb_y << 8 | + mb_x); + OUT_BATCH(batch, last_object); + + ADVANCE_BATCH(batch); + } - BEGIN_BATCH(batch, len_in_dowrds); + if (!last_object) { + number_mb_cmds = total_mbs % number_mb_cmds; + mb_x = starting_mb % mb_width; + mb_y = starting_mb / mb_width; + last_object = 1; + starting_mb += number_mb_cmds; + + BEGIN_BATCH(batch, 9); - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (len_in_dowrds - 2)); - OUT_BATCH(batch, kernel); /*Interface Descriptor Offset*/ - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (9 - 2)); + OUT_BATCH(batch, VME_BATCHBUFFER); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); + OUT_BATCH(batch, 0); - /*inline data */ - OUT_BATCH(batch, mb_width << 16 | mb_y << 8 | mb_x); /*M0.0 Refrence0 X,Y, not used in Intra*/ - OUT_BATCH(batch, pPicParameter->pic_fields.bits.transform_8x8_mode_flag); /* Enabling or disabling 8x8,4x4 Intra mode, - more control flags will added here.*/ - ADVANCE_BATCH(batch); + /*inline data */ + OUT_BATCH(batch, + kernel << 24 | + transform_8x8_mode_flag << 16 | + mb_width); + OUT_BATCH(batch, + number_mb_cmds << 16 | + mb_y << 8 | + mb_x); + OUT_BATCH(batch, last_object); + + ADVANCE_BATCH(batch); + } - return len_in_dowrds * 4; } static void gen6_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) @@ -840,6 +966,9 @@ static void gen6_vme_media_init(VADriverContextP ctx, struct intel_encoder_conte dri_bo_unreference(vme_context->vme_output.bo); vme_context->vme_output.bo = NULL; + dri_bo_unreference(vme_context->vme_batchbuffer.bo); + vme_context->vme_batchbuffer.bo = NULL; + /* VME state */ dri_bo_unreference(vme_context->vme_state.bo); bo = dri_bo_alloc(i965->intel.bufmgr, @@ -859,13 +988,14 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct intel_batchbuffer *batch = encoder_context->base.batch; + VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; int width_in_mbs = pSequenceParameter->picture_width_in_mbs; int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - int x, y; intel_batchbuffer_start_atomic(batch, 0x1000); @@ -880,12 +1010,22 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx, gen6_vme_curbe_load(ctx, encoder_context); gen6_vme_idrt(ctx, encoder_context); - /*Step4: Primitive commands*/ - for(y = 0; y < height_in_mbs; y++){ - for(x = 0; x < width_in_mbs; x++){ - gen6_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, encoder_context); - } - } + gen6_vme_fill_vme_batchbuffer(ctx, + encode_state, + width_in_mbs, height_in_mbs, + is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, + pPicParameter->pic_fields.bits.transform_8x8_mode_flag, + encoder_context); + + intel_batchbuffer_emit_mi_flush(batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6)); + OUT_RELOC(batch, + vme_context->vme_batchbuffer.bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + ADVANCE_BATCH(batch); intel_batchbuffer_end_atomic(batch); } diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 8a2b6a3..d53697c 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -38,7 +38,7 @@ #define MAX_INTERFACE_DESC_GEN6 32 #define MAX_MEDIA_SURFACES_GEN6 34 -#define GEN6_VME_KERNEL_NUMBER 2 +#define GEN6_VME_KERNEL_NUMBER 3 struct encode_state; struct intel_encoder_context; @@ -76,6 +76,13 @@ struct gen6_vme_context unsigned int pitch; } vme_output; + struct { + dri_bo *bo; + unsigned int num_blocks; + unsigned int size_block; + unsigned int pitch; + } vme_batchbuffer; + struct i965_kernel vme_kernels[GEN6_VME_KERNEL_NUMBER]; }; -- 2.11.0