2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
44 #ifdef SURFACE_STATE_PADDED_SIZE
45 #undef SURFACE_STATE_PADDED_SIZE
48 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52 #define VME_INTRA_SHADER 0
53 #define VME_INTER_SHADER 1
54 #define VME_BINTER_SHADER 2
56 #define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH 4 /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
60 #define VME_MSG_LENGTH 32
62 static const uint32_t gen9_vme_intra_frame[][4] = {
63 #include "shaders/vme/intra_frame_gen9.g9b"
66 static const uint32_t gen9_vme_inter_frame[][4] = {
67 #include "shaders/vme/inter_frame_gen9.g9b"
70 static const uint32_t gen9_vme_inter_bframe[][4] = {
71 #include "shaders/vme/inter_bframe_gen9.g9b"
74 static struct i965_kernel gen9_vme_kernels[] = {
77 VME_INTRA_SHADER, /*index*/
79 sizeof(gen9_vme_intra_frame),
86 sizeof(gen9_vme_inter_frame),
92 gen9_vme_inter_bframe,
93 sizeof(gen9_vme_inter_bframe),
98 static const uint32_t gen9_vme_mpeg2_intra_frame[][4] = {
99 #include "shaders/vme/intra_frame_gen9.g9b"
102 static const uint32_t gen9_vme_mpeg2_inter_frame[][4] = {
103 #include "shaders/vme/mpeg2_inter_gen9.g9b"
106 static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
109 VME_INTRA_SHADER, /*index*/
110 gen9_vme_mpeg2_intra_frame,
111 sizeof(gen9_vme_mpeg2_intra_frame),
117 gen9_vme_mpeg2_inter_frame,
118 sizeof(gen9_vme_mpeg2_inter_frame),
123 static const uint32_t gen9_vme_vp8_intra_frame[][4] = {
124 #include "shaders/vme/vp8_intra_frame_gen9.g9b"
127 static const uint32_t gen9_vme_vp8_inter_frame[][4] = {
128 #include "shaders/vme/vp8_inter_frame_gen9.g9b"
131 static struct i965_kernel gen9_vme_vp8_kernels[] = {
134 VME_INTRA_SHADER, /*index*/
135 gen9_vme_vp8_intra_frame,
136 sizeof(gen9_vme_vp8_intra_frame),
142 gen9_vme_vp8_inter_frame,
143 sizeof(gen9_vme_vp8_inter_frame),
150 static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
151 #include "shaders/vme/intra_frame_gen9.g9b"
154 static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
155 #include "shaders/vme/inter_frame_gen9.g9b"
158 static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
159 #include "shaders/vme/inter_bframe_gen9.g9b"
162 static struct i965_kernel gen9_vme_hevc_kernels[] = {
165 VME_INTRA_SHADER, /*index*/
166 gen9_vme_hevc_intra_frame,
167 sizeof(gen9_vme_hevc_intra_frame),
173 gen9_vme_hevc_inter_frame,
174 sizeof(gen9_vme_hevc_inter_frame),
180 gen9_vme_hevc_inter_bframe,
181 sizeof(gen9_vme_hevc_inter_bframe),
185 /* only used for VME source surface state */
187 gen9_vme_source_surface_state(VADriverContextP ctx,
189 struct object_surface *obj_surface,
190 struct intel_encoder_context *encoder_context)
192 struct gen6_vme_context *vme_context = encoder_context->vme_context;
194 vme_context->vme_surface2_setup(ctx,
195 &vme_context->gpe_context,
197 BINDING_TABLE_OFFSET(index),
198 SURFACE_STATE_OFFSET(index));
202 gen9_vme_media_source_surface_state(VADriverContextP ctx,
204 struct object_surface *obj_surface,
205 struct intel_encoder_context *encoder_context)
207 struct gen6_vme_context *vme_context = encoder_context->vme_context;
209 vme_context->vme_media_rw_surface_setup(ctx,
210 &vme_context->gpe_context,
212 BINDING_TABLE_OFFSET(index),
213 SURFACE_STATE_OFFSET(index));
217 gen9_vme_media_chroma_source_surface_state(VADriverContextP ctx,
219 struct object_surface *obj_surface,
220 struct intel_encoder_context *encoder_context)
222 struct gen6_vme_context *vme_context = encoder_context->vme_context;
224 vme_context->vme_media_chroma_surface_setup(ctx,
225 &vme_context->gpe_context,
227 BINDING_TABLE_OFFSET(index),
228 SURFACE_STATE_OFFSET(index));
232 gen9_vme_output_buffer_setup(VADriverContextP ctx,
233 struct encode_state *encode_state,
235 struct intel_encoder_context *encoder_context,
241 struct i965_driver_data *i965 = i965_driver_data(ctx);
242 struct gen6_vme_context *vme_context = encoder_context->vme_context;
244 vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
245 vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
248 vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
250 vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
252 * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
253 * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
254 * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
257 vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
259 vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
261 assert(vme_context->vme_output.bo);
262 vme_context->vme_buffer_suface_setup(ctx,
263 &vme_context->gpe_context,
264 &vme_context->vme_output,
265 BINDING_TABLE_OFFSET(index),
266 SURFACE_STATE_OFFSET(index));
270 gen9_vme_avc_output_buffer_setup(VADriverContextP ctx,
271 struct encode_state *encode_state,
273 struct intel_encoder_context *encoder_context)
275 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
276 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
277 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
278 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
279 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
281 gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
286 gen9_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
287 struct encode_state *encode_state,
289 struct intel_encoder_context *encoder_context,
293 struct i965_driver_data *i965 = i965_driver_data(ctx);
294 struct gen6_vme_context *vme_context = encoder_context->vme_context;
296 vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
297 vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
298 vme_context->vme_batchbuffer.pitch = 16;
299 vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
301 vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
303 vme_context->vme_buffer_suface_setup(ctx,
304 &vme_context->gpe_context,
305 &vme_context->vme_batchbuffer,
306 BINDING_TABLE_OFFSET(index),
307 SURFACE_STATE_OFFSET(index));
311 gen9_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
312 struct encode_state *encode_state,
314 struct intel_encoder_context *encoder_context)
316 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
317 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
318 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
320 gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
325 gen9_vme_surface_setup(VADriverContextP ctx,
326 struct encode_state *encode_state,
328 struct intel_encoder_context *encoder_context)
330 struct object_surface *obj_surface;
332 /*Setup surfaces state*/
333 /* current picture for encoding */
334 obj_surface = encode_state->input_yuv_object;
335 gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
336 gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
337 gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
340 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
343 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
344 assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
346 intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
348 if (slice_type == SLICE_TYPE_B)
349 intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
353 gen9_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
354 gen9_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
356 return VA_STATUS_SUCCESS;
359 static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
360 struct encode_state *encode_state,
361 struct intel_encoder_context *encoder_context)
363 struct gen6_vme_context *vme_context = encoder_context->vme_context;
364 struct gen8_interface_descriptor_data *desc;
367 unsigned char *desc_ptr;
369 bo = vme_context->gpe_context.dynamic_state.bo;
372 desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
374 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
376 for (i = 0; i < vme_context->vme_kernel_sum; i++) {
377 struct i965_kernel *kernel;
378 kernel = &vme_context->gpe_context.kernels[i];
379 assert(sizeof(*desc) == 32);
380 /*Setup the descritor table*/
381 memset(desc, 0, sizeof(*desc));
382 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
383 desc->desc3.sampler_count = 0; /* FIXME: */
384 desc->desc3.sampler_state_pointer = 0;
385 desc->desc4.binding_table_entry_count = 1; /* FIXME: */
386 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
387 desc->desc5.constant_urb_entry_read_offset = 0;
388 desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
395 return VA_STATUS_SUCCESS;
398 static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
399 struct encode_state *encode_state,
400 struct intel_encoder_context *encoder_context)
402 struct gen6_vme_context *vme_context = encoder_context->vme_context;
403 unsigned char *constant_buffer;
404 unsigned int *vme_state_message;
407 vme_state_message = (unsigned int *)vme_context->vme_state_message;
409 if (encoder_context->codec == CODEC_H264 ||
410 encoder_context->codec == CODEC_H264_MVC) {
411 if (vme_context->h264_level >= 30) {
414 if (vme_context->h264_level >= 31)
417 } else if (encoder_context->codec == CODEC_MPEG2) {
419 }else if (encoder_context->codec == CODEC_HEVC) {
420 if (vme_context->hevc_level >= 30*3) {
423 if (vme_context->hevc_level >= 31*3)
425 }/* use the avc level setting */
428 vme_state_message[31] = mv_num;
430 dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
431 assert(vme_context->gpe_context.dynamic_state.bo->virtual);
432 constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
433 vme_context->gpe_context.curbe_offset;
435 /* VME MV/Mb cost table is passed by using const buffer */
436 /* Now it uses the fixed search path. So it is constructed directly
439 memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
441 dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
443 return VA_STATUS_SUCCESS;
446 #define MB_SCOREBOARD_A (1 << 0)
447 #define MB_SCOREBOARD_B (1 << 1)
448 #define MB_SCOREBOARD_C (1 << 2)
450 /* check whether the mb of (x_index, y_index) is out of bound */
451 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
454 if (x_index < 0 || x_index >= mb_width)
456 if (y_index < 0 || y_index >= mb_height)
459 mb_index = y_index * mb_width + x_index;
460 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
466 gen9wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
467 struct encode_state *encode_state,
468 int mb_width, int mb_height,
470 int transform_8x8_mode_flag,
471 struct intel_encoder_context *encoder_context)
473 struct gen6_vme_context *vme_context = encoder_context->vme_context;
476 unsigned int *command_ptr;
478 #define USE_SCOREBOARD (1 << 21)
480 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
481 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
483 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
484 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
485 int first_mb = pSliceParameter->macroblock_address;
486 int num_mb = pSliceParameter->num_macroblocks;
487 unsigned int mb_intra_ub, score_dep;
488 int x_outer, y_outer, x_inner, y_inner;
491 x_outer = first_mb % mb_width;
492 y_outer = first_mb / mb_width;
495 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
498 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
502 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
503 score_dep |= MB_SCOREBOARD_A;
505 if (y_inner != mb_row) {
506 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
507 score_dep |= MB_SCOREBOARD_B;
509 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
510 if (x_inner != (mb_width -1)) {
511 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
512 score_dep |= MB_SCOREBOARD_C;
516 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
517 *command_ptr++ = kernel;
518 *command_ptr++ = USE_SCOREBOARD;
521 /* the (X, Y) term of scoreboard */
522 *command_ptr++ = ((y_inner << 16) | x_inner);
523 *command_ptr++ = score_dep;
525 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
526 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
527 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
536 xtemp_outer = mb_width - 2;
539 x_outer = xtemp_outer;
540 y_outer = first_mb / mb_width;
541 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
544 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
548 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
549 score_dep |= MB_SCOREBOARD_A;
551 if (y_inner != mb_row) {
552 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
553 score_dep |= MB_SCOREBOARD_B;
555 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
557 if (x_inner != (mb_width -1)) {
558 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
559 score_dep |= MB_SCOREBOARD_C;
563 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
564 *command_ptr++ = kernel;
565 *command_ptr++ = USE_SCOREBOARD;
568 /* the (X, Y) term of scoreboard */
569 *command_ptr++ = ((y_inner << 16) | x_inner);
570 *command_ptr++ = score_dep;
572 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
573 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
575 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
581 if (x_outer >= mb_width) {
583 x_outer = xtemp_outer;
588 *command_ptr++ = MI_BATCH_BUFFER_END;
591 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
595 gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx,
596 struct encode_state *encode_state,
597 int mb_width, int mb_height,
599 int transform_8x8_mode_flag,
600 struct intel_encoder_context *encoder_context)
602 struct gen6_vme_context *vme_context = encoder_context->vme_context;
603 int mb_x = 0, mb_y = 0;
605 unsigned int *command_ptr;
607 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
608 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
610 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
611 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
612 int slice_mb_begin = pSliceParameter->macroblock_address;
613 int slice_mb_number = pSliceParameter->num_macroblocks;
614 unsigned int mb_intra_ub;
615 int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
616 for (i = 0; i < slice_mb_number; ) {
617 int mb_count = i + slice_mb_begin;
618 mb_x = mb_count % mb_width;
619 mb_y = mb_count / mb_width;
622 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
625 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
627 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
628 if (mb_x != (mb_width -1))
629 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
633 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
634 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
635 if ((i == (mb_width - 1)) && slice_mb_x) {
636 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
640 if ((i == mb_width) && slice_mb_x) {
641 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
643 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
644 *command_ptr++ = kernel;
651 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
652 *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
654 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
660 *command_ptr++ = MI_BATCH_BUFFER_END;
663 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
666 static void gen9_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
668 struct gen6_vme_context *vme_context = encoder_context->vme_context;
670 gen8_gpe_context_init(ctx, &vme_context->gpe_context);
672 /* VME output buffer */
673 dri_bo_unreference(vme_context->vme_output.bo);
674 vme_context->vme_output.bo = NULL;
676 dri_bo_unreference(vme_context->vme_batchbuffer.bo);
677 vme_context->vme_batchbuffer.bo = NULL;
680 dri_bo_unreference(vme_context->vme_state.bo);
681 vme_context->vme_state.bo = NULL;
684 static void gen9_vme_pipeline_programing(VADriverContextP ctx,
685 struct encode_state *encode_state,
686 struct intel_encoder_context *encoder_context)
688 struct gen6_vme_context *vme_context = encoder_context->vme_context;
689 struct intel_batchbuffer *batch = encoder_context->base.batch;
690 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
691 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
692 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
693 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
694 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
696 bool allow_hwscore = true;
698 unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
701 allow_hwscore = false;
703 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
704 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
705 if ((pSliceParameter->macroblock_address % width_in_mbs)) {
706 allow_hwscore = false;
712 if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
713 (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
714 kernel_shader = VME_INTRA_SHADER;
715 } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
716 (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
717 kernel_shader = VME_INTER_SHADER;
719 kernel_shader = VME_BINTER_SHADER;
721 kernel_shader = VME_INTER_SHADER;
724 gen9wa_vme_walker_fill_vme_batchbuffer(ctx,
726 width_in_mbs, height_in_mbs,
728 pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
731 gen9_vme_fill_vme_batchbuffer(ctx,
733 width_in_mbs, height_in_mbs,
735 pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
738 intel_batchbuffer_start_atomic(batch, 0x1000);
739 gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
740 BEGIN_BATCH(batch, 3);
741 OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
743 vme_context->vme_batchbuffer.bo,
744 I915_GEM_DOMAIN_COMMAND, 0,
747 ADVANCE_BATCH(batch);
749 gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
751 intel_batchbuffer_end_atomic(batch);
754 static VAStatus gen9_vme_prepare(VADriverContextP ctx,
755 struct encode_state *encode_state,
756 struct intel_encoder_context *encoder_context)
758 VAStatus vaStatus = VA_STATUS_SUCCESS;
759 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
760 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
761 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
762 struct gen6_vme_context *vme_context = encoder_context->vme_context;
764 if (!vme_context->h264_level ||
765 (vme_context->h264_level != pSequenceParameter->level_idc)) {
766 vme_context->h264_level = pSequenceParameter->level_idc;
769 intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
771 /*Setup all the memory object*/
772 gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
773 gen9_vme_interface_setup(ctx, encode_state, encoder_context);
774 //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
775 gen9_vme_constant_setup(ctx, encode_state, encoder_context);
777 /*Programing media pipeline*/
778 gen9_vme_pipeline_programing(ctx, encode_state, encoder_context);
783 static VAStatus gen9_vme_run(VADriverContextP ctx,
784 struct encode_state *encode_state,
785 struct intel_encoder_context *encoder_context)
787 struct intel_batchbuffer *batch = encoder_context->base.batch;
789 intel_batchbuffer_flush(batch);
791 return VA_STATUS_SUCCESS;
794 static VAStatus gen9_vme_stop(VADriverContextP ctx,
795 struct encode_state *encode_state,
796 struct intel_encoder_context *encoder_context)
798 return VA_STATUS_SUCCESS;
802 gen9_vme_pipeline(VADriverContextP ctx,
804 struct encode_state *encode_state,
805 struct intel_encoder_context *encoder_context)
807 gen9_vme_media_init(ctx, encoder_context);
808 gen9_vme_prepare(ctx, encode_state, encoder_context);
809 gen9_vme_run(ctx, encode_state, encoder_context);
810 gen9_vme_stop(ctx, encode_state, encoder_context);
812 return VA_STATUS_SUCCESS;
816 gen9_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
817 struct encode_state *encode_state,
820 struct intel_encoder_context *encoder_context)
823 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
824 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
825 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
827 gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
831 gen9_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
832 struct encode_state *encode_state,
834 struct intel_encoder_context *encoder_context)
837 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
838 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
839 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
841 gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
845 gen9_vme_mpeg2_surface_setup(VADriverContextP ctx,
846 struct encode_state *encode_state,
848 struct intel_encoder_context *encoder_context)
850 struct object_surface *obj_surface;
852 /*Setup surfaces state*/
853 /* current picture for encoding */
854 obj_surface = encode_state->input_yuv_object;
855 gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
856 gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
857 gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
861 obj_surface = encode_state->reference_objects[0];
863 if (obj_surface->bo != NULL)
864 gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
867 obj_surface = encode_state->reference_objects[1];
869 if (obj_surface && obj_surface->bo != NULL)
870 gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
874 gen9_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
875 gen9_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
877 return VA_STATUS_SUCCESS;
881 gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
882 struct encode_state *encode_state,
883 int mb_width, int mb_height,
885 struct intel_encoder_context *encoder_context)
887 struct gen6_vme_context *vme_context = encoder_context->vme_context;
888 unsigned int *command_ptr;
890 #define MPEG2_SCOREBOARD (1 << 21)
892 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
893 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
896 unsigned int mb_intra_ub, score_dep;
897 int x_outer, y_outer, x_inner, y_inner;
900 int num_mb = mb_width * mb_height;
905 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
908 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
912 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
913 score_dep |= MB_SCOREBOARD_A;
916 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
917 score_dep |= MB_SCOREBOARD_B;
920 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
922 if (x_inner != (mb_width -1)) {
923 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
924 score_dep |= MB_SCOREBOARD_C;
928 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
929 *command_ptr++ = kernel;
930 *command_ptr++ = MPEG2_SCOREBOARD;
933 /* the (X, Y) term of scoreboard */
934 *command_ptr++ = ((y_inner << 16) | x_inner);
935 *command_ptr++ = score_dep;
937 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
938 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
939 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
948 xtemp_outer = mb_width - 2;
951 x_outer = xtemp_outer;
953 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
956 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
960 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
961 score_dep |= MB_SCOREBOARD_A;
964 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
965 score_dep |= MB_SCOREBOARD_B;
968 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
970 if (x_inner != (mb_width -1)) {
971 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
972 score_dep |= MB_SCOREBOARD_C;
976 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
977 *command_ptr++ = kernel;
978 *command_ptr++ = MPEG2_SCOREBOARD;
981 /* the (X, Y) term of scoreboard */
982 *command_ptr++ = ((y_inner << 16) | x_inner);
983 *command_ptr++ = score_dep;
985 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
986 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
988 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
994 if (x_outer >= mb_width) {
996 x_outer = xtemp_outer;
1001 *command_ptr++ = MI_BATCH_BUFFER_END;
1004 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1009 gen9_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
1010 struct encode_state *encode_state,
1011 int mb_width, int mb_height,
1013 int transform_8x8_mode_flag,
1014 struct intel_encoder_context *encoder_context)
1016 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1017 int mb_x = 0, mb_y = 0;
1019 unsigned int *command_ptr;
1022 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1023 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1025 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1026 VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1028 for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1029 int slice_mb_begin = slice_param->macroblock_address;
1030 int slice_mb_number = slice_param->num_macroblocks;
1031 unsigned int mb_intra_ub;
1033 for (i = 0; i < slice_mb_number;) {
1034 int mb_count = i + slice_mb_begin;
1036 mb_x = mb_count % mb_width;
1037 mb_y = mb_count / mb_width;
1041 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1045 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1048 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1050 if (mb_x != (mb_width -1))
1051 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1054 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1055 *command_ptr++ = kernel;
1062 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1063 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1065 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1074 *command_ptr++ = MI_BATCH_BUFFER_END;
1077 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1081 gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
1082 struct encode_state *encode_state,
1084 struct intel_encoder_context *encoder_context)
1086 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1087 struct intel_batchbuffer *batch = encoder_context->base.batch;
1088 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1089 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1090 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1091 bool allow_hwscore = true;
1094 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1096 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1098 VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1100 for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1101 if (slice_param->macroblock_address % width_in_mbs) {
1102 allow_hwscore = false;
1108 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1109 if (pic_param->picture_type == VAEncPictureTypeIntra) {
1110 allow_hwscore = false;
1111 kernel_shader = VME_INTRA_SHADER;
1113 kernel_shader = VME_INTER_SHADER;
1117 gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1119 width_in_mbs, height_in_mbs,
1123 gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
1125 width_in_mbs, height_in_mbs,
1126 is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1130 intel_batchbuffer_start_atomic(batch, 0x1000);
1131 gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1132 BEGIN_BATCH(batch, 4);
1133 OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1135 vme_context->vme_batchbuffer.bo,
1136 I915_GEM_DOMAIN_COMMAND, 0,
1138 OUT_BATCH(batch, 0);
1139 OUT_BATCH(batch, 0);
1140 ADVANCE_BATCH(batch);
1142 gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1144 intel_batchbuffer_end_atomic(batch);
1148 gen9_vme_mpeg2_prepare(VADriverContextP ctx,
1149 struct encode_state *encode_state,
1150 struct intel_encoder_context *encoder_context)
1152 VAStatus vaStatus = VA_STATUS_SUCCESS;
1153 VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1154 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1155 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1157 if ((!vme_context->mpeg2_level) ||
1158 (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1159 vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1162 /*Setup all the memory object*/
1163 gen9_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1164 gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1165 //gen9_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1166 intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1167 gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1169 /*Programing media pipeline*/
1170 gen9_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1176 gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
1178 struct encode_state *encode_state,
1179 struct intel_encoder_context *encoder_context)
1181 gen9_vme_media_init(ctx, encoder_context);
1182 gen9_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1183 gen9_vme_run(ctx, encode_state, encoder_context);
1184 gen9_vme_stop(ctx, encode_state, encoder_context);
1186 return VA_STATUS_SUCCESS;
1190 gen9_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1191 struct encode_state *encode_state,
1194 struct intel_encoder_context *encoder_context)
1196 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1197 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1198 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1200 gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1204 gen9_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1205 struct encode_state *encode_state,
1207 struct intel_encoder_context *encoder_context)
1209 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1210 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1211 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1213 gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1217 gen9_vme_vp8_surface_setup(VADriverContextP ctx,
1218 struct encode_state *encode_state,
1220 struct intel_encoder_context *encoder_context)
1222 struct object_surface *obj_surface;
1224 /*Setup surfaces state*/
1225 /* current picture for encoding */
1226 obj_surface = encode_state->input_yuv_object;
1227 gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1228 gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1229 gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1233 obj_surface = encode_state->reference_objects[0];
1235 if (obj_surface->bo != NULL)
1236 gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1239 obj_surface = encode_state->reference_objects[1];
1241 if (obj_surface && obj_surface->bo != NULL)
1242 gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1246 gen9_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1247 gen9_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1249 return VA_STATUS_SUCCESS;
1253 gen9_vme_vp8_pipeline_programing(VADriverContextP ctx,
1254 struct encode_state *encode_state,
1256 struct intel_encoder_context *encoder_context)
1258 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1259 struct intel_batchbuffer *batch = encoder_context->base.batch;
1260 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1261 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1262 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1263 int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1265 gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1267 width_in_mbs, height_in_mbs,
1271 intel_batchbuffer_start_atomic(batch, 0x1000);
1272 gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1273 BEGIN_BATCH(batch, 4);
1274 OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1276 vme_context->vme_batchbuffer.bo,
1277 I915_GEM_DOMAIN_COMMAND, 0,
1279 OUT_BATCH(batch, 0);
1280 OUT_BATCH(batch, 0);
1281 ADVANCE_BATCH(batch);
1283 gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1285 intel_batchbuffer_end_atomic(batch);
1288 static VAStatus gen9_vme_vp8_prepare(VADriverContextP ctx,
1289 struct encode_state *encode_state,
1290 struct intel_encoder_context *encoder_context)
1292 VAStatus vaStatus = VA_STATUS_SUCCESS;
1293 VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1294 int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1296 /* update vp8 mbmv cost */
1297 intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1299 /*Setup all the memory object*/
1300 gen9_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1301 gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1302 gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1304 /*Programing media pipeline*/
1305 gen9_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1311 gen9_vme_vp8_pipeline(VADriverContextP ctx,
1313 struct encode_state *encode_state,
1314 struct intel_encoder_context *encoder_context)
1316 gen9_vme_media_init(ctx, encoder_context);
1317 gen9_vme_vp8_prepare(ctx, encode_state, encoder_context);
1318 gen9_vme_run(ctx, encode_state, encoder_context);
1319 gen9_vme_stop(ctx, encode_state, encoder_context);
1321 return VA_STATUS_SUCCESS;
1327 gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
1328 struct encode_state *encode_state,
1330 struct intel_encoder_context *encoder_context)
1333 struct i965_driver_data *i965 = i965_driver_data(ctx);
1334 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1335 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1336 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1337 int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1338 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1339 int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1342 vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
1343 vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
1346 vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
1348 vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
1350 * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
1351 * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
1352 * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
1355 vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
1356 "VME output buffer",
1357 vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
1359 assert(vme_context->vme_output.bo);
1360 vme_context->vme_buffer_suface_setup(ctx,
1361 &vme_context->gpe_context,
1362 &vme_context->vme_output,
1363 BINDING_TABLE_OFFSET(index),
1364 SURFACE_STATE_OFFSET(index));
1368 gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
1369 struct encode_state *encode_state,
1371 struct intel_encoder_context *encoder_context)
1374 struct i965_driver_data *i965 = i965_driver_data(ctx);
1375 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1376 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1377 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1378 int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1380 vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
1381 vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
1382 vme_context->vme_batchbuffer.pitch = 16;
1383 vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
1385 vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
1389 gen9_vme_hevc_surface_setup(VADriverContextP ctx,
1390 struct encode_state *encode_state,
1392 struct intel_encoder_context *encoder_context)
1394 struct object_surface *obj_surface;
1396 /*Setup surfaces state*/
1397 /* current picture for encoding */
1398 obj_surface = encode_state->input_yuv_object;
1399 gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1400 gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1401 gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1404 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1407 slice_type = slice_param->slice_type;
1408 assert(slice_type != HEVC_SLICE_I);
1411 intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
1413 if (slice_type == HEVC_SLICE_B)
1414 intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
1418 gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
1419 gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1421 return VA_STATUS_SUCCESS;
1424 gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1425 struct encode_state *encode_state,
1426 int mb_width, int mb_height,
1428 int transform_8x8_mode_flag,
1429 struct intel_encoder_context *encoder_context)
1431 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1434 unsigned int *command_ptr;
1435 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1436 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1437 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1438 int ctb_size = 1 << log2_ctb_size;
1439 int num_mb_in_ctb = (ctb_size + 15)/16;
1440 num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1442 #define USE_SCOREBOARD (1 << 21)
1444 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1445 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1447 /*slice_segment_address must picture_width_in_ctb alainment */
1448 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1449 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1450 int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1451 int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1452 unsigned int mb_intra_ub, score_dep;
1453 int x_outer, y_outer, x_inner, y_inner;
1454 int xtemp_outer = 0;
1456 x_outer = first_mb % mb_width;
1457 y_outer = first_mb / mb_width;
1460 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1463 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1467 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1468 score_dep |= MB_SCOREBOARD_A;
1470 if (y_inner != mb_row) {
1471 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1472 score_dep |= MB_SCOREBOARD_B;
1474 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1475 if (x_inner != (mb_width -1)) {
1476 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1477 score_dep |= MB_SCOREBOARD_C;
1481 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1482 *command_ptr++ = kernel;
1483 *command_ptr++ = USE_SCOREBOARD;
1486 /* the (X, Y) term of scoreboard */
1487 *command_ptr++ = ((y_inner << 16) | x_inner);
1488 *command_ptr++ = score_dep;
1490 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1491 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1492 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1501 xtemp_outer = mb_width - 2;
1502 if (xtemp_outer < 0)
1504 x_outer = xtemp_outer;
1505 y_outer = first_mb / mb_width;
1506 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1509 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1513 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1514 score_dep |= MB_SCOREBOARD_A;
1516 if (y_inner != mb_row) {
1517 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1518 score_dep |= MB_SCOREBOARD_B;
1520 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1522 if (x_inner != (mb_width -1)) {
1523 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1524 score_dep |= MB_SCOREBOARD_C;
1528 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1529 *command_ptr++ = kernel;
1530 *command_ptr++ = USE_SCOREBOARD;
1533 /* the (X, Y) term of scoreboard */
1534 *command_ptr++ = ((y_inner << 16) | x_inner);
1535 *command_ptr++ = score_dep;
1537 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1538 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1540 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1546 if (x_outer >= mb_width) {
1548 x_outer = xtemp_outer;
1553 *command_ptr++ = MI_BATCH_BUFFER_END;
1556 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1560 gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
1561 struct encode_state *encode_state,
1562 int mb_width, int mb_height,
1564 int transform_8x8_mode_flag,
1565 struct intel_encoder_context *encoder_context)
1567 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1568 int mb_x = 0, mb_y = 0;
1570 unsigned int *command_ptr;
1571 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1572 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1573 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1575 int ctb_size = 1 << log2_ctb_size;
1576 int num_mb_in_ctb = (ctb_size + 15)/16;
1577 num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1579 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1580 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1582 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1583 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1584 int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1585 int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1587 unsigned int mb_intra_ub;
1588 int slice_mb_x = slice_mb_begin % mb_width;
1589 for (i = 0; i < slice_mb_number; ) {
1590 int mb_count = i + slice_mb_begin;
1591 mb_x = mb_count % mb_width;
1592 mb_y = mb_count / mb_width;
1596 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1599 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1601 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1602 if (mb_x != (mb_width -1))
1603 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1607 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
1608 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
1609 if ((i == (mb_width - 1)) && slice_mb_x) {
1610 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1614 if ((i == mb_width) && slice_mb_x) {
1615 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
1618 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1619 *command_ptr++ = kernel;
1626 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1627 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1629 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1635 *command_ptr++ = MI_BATCH_BUFFER_END;
1638 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1641 static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
1642 struct encode_state *encode_state,
1643 struct intel_encoder_context *encoder_context)
1645 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1646 struct intel_batchbuffer *batch = encoder_context->base.batch;
1647 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1648 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1649 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1650 int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1652 bool allow_hwscore = true;
1655 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1656 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1658 int ctb_size = 1 << log2_ctb_size;
1659 int num_mb_in_ctb = (ctb_size + 15)/16;
1660 int transform_8x8_mode_flag = 1;
1661 num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1663 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1664 pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1665 int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1666 if ((slice_mb_begin % width_in_mbs)) {
1667 allow_hwscore = false;
1672 if (pSliceParameter->slice_type == HEVC_SLICE_I) {
1673 kernel_shader = VME_INTRA_SHADER;
1674 } else if (pSliceParameter->slice_type == HEVC_SLICE_P) {
1675 kernel_shader = VME_INTER_SHADER;
1677 kernel_shader = VME_BINTER_SHADER;
1679 kernel_shader = VME_INTER_SHADER;
1682 gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
1684 width_in_mbs, height_in_mbs,
1686 transform_8x8_mode_flag,
1689 gen9_vme_hevc_fill_vme_batchbuffer(ctx,
1691 width_in_mbs, height_in_mbs,
1693 transform_8x8_mode_flag,
1696 intel_batchbuffer_start_atomic(batch, 0x1000);
1697 gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1698 BEGIN_BATCH(batch, 3);
1699 OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1701 vme_context->vme_batchbuffer.bo,
1702 I915_GEM_DOMAIN_COMMAND, 0,
1704 OUT_BATCH(batch, 0);
1705 ADVANCE_BATCH(batch);
1707 gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1709 intel_batchbuffer_end_atomic(batch);
1712 static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
1713 struct encode_state *encode_state,
1714 struct intel_encoder_context *encoder_context)
1716 VAStatus vaStatus = VA_STATUS_SUCCESS;
1717 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1718 int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1719 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1720 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1722 /* here use the avc level for hevc vme */
1723 if (!vme_context->hevc_level ||
1724 (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
1725 vme_context->hevc_level = pSequenceParameter->general_level_idc;
1728 intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
1730 /*Setup all the memory object*/
1731 gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
1732 gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1733 //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
1734 gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1736 /*Programing media pipeline*/
1737 gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
1744 gen9_vme_hevc_pipeline(VADriverContextP ctx,
1746 struct encode_state *encode_state,
1747 struct intel_encoder_context *encoder_context)
1749 gen9_vme_media_init(ctx, encoder_context);
1750 gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
1751 gen9_vme_run(ctx, encode_state, encoder_context);
1752 gen9_vme_stop(ctx, encode_state, encoder_context);
1754 return VA_STATUS_SUCCESS;
1759 gen9_vme_context_destroy(void *context)
1761 struct gen6_vme_context *vme_context = context;
1763 gen8_gpe_context_destroy(&vme_context->gpe_context);
1765 dri_bo_unreference(vme_context->vme_output.bo);
1766 vme_context->vme_output.bo = NULL;
1768 dri_bo_unreference(vme_context->vme_state.bo);
1769 vme_context->vme_state.bo = NULL;
1771 dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1772 vme_context->vme_batchbuffer.bo = NULL;
1774 if (vme_context->vme_state_message) {
1775 free(vme_context->vme_state_message);
1776 vme_context->vme_state_message = NULL;
1779 free(vme_context->qp_per_mb);
1780 vme_context->qp_per_mb = NULL;
1785 Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1787 struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1788 struct i965_kernel *vme_kernel_list = NULL;
1789 int i965_kernel_num;
1791 switch (encoder_context->codec) {
1793 case CODEC_H264_MVC:
1794 vme_kernel_list = gen9_vme_kernels;
1795 encoder_context->vme_pipeline = gen9_vme_pipeline;
1796 i965_kernel_num = sizeof(gen9_vme_kernels) / sizeof(struct i965_kernel);
1800 vme_kernel_list = gen9_vme_mpeg2_kernels;
1801 encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
1802 i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
1806 vme_kernel_list = gen9_vme_vp8_kernels;
1807 encoder_context->vme_pipeline = gen9_vme_vp8_pipeline;
1808 i965_kernel_num = sizeof(gen9_vme_vp8_kernels) / sizeof(struct i965_kernel);
1812 vme_kernel_list = gen9_vme_hevc_kernels;
1813 encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
1814 i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
1818 /* never get here */
1824 assert(vme_context);
1825 vme_context->vme_kernel_sum = i965_kernel_num;
1826 vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1828 vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
1829 vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
1830 vme_context->gpe_context.sampler_size = 0;
1833 vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1834 vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1835 vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1836 vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1837 vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1839 gen7_vme_scoreboard_init(ctx, vme_context);
1841 gen8_gpe_load_kernels(ctx,
1842 &vme_context->gpe_context,
1845 vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
1846 vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
1847 vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
1848 vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
1850 encoder_context->vme_context = vme_context;
1851 encoder_context->vme_context_destroy = gen9_vme_context_destroy;
1853 vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));