2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Qu Pengfei <Pengfei.Qu@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
43 #include "intel_media.h"
45 typedef enum _gen6_brc_status {
46 BRC_NO_HRD_VIOLATION = 0,
49 BRC_UNDERFLOW_WITH_MAX_QP = 3,
50 BRC_OVERFLOW_WITH_MIN_QP = 4,
54 #define BRC_CLIP(x, min, max) \
56 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
59 #define BRC_P_B_QP_DIFF 4
60 #define BRC_I_P_QP_DIFF 2
61 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
63 #define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */
64 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
66 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
67 #define BRC_CY 0.1 /* weight for */
68 #define BRC_CX_UNDERFLOW 5.
69 #define BRC_CX_OVERFLOW -4.
71 #define BRC_PI_0_5 1.5707963267948966192313216916398
73 /* intel buffer write */
74 #define ALLOC_ENCODER_BUFFER(gen_buffer, string, size) do { \
75 dri_bo_unreference(gen_buffer->bo); \
76 gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr, \
80 assert(gen_buffer->bo); \
84 #define OUT_BUFFER_X(buf_bo, is_target, ma) do { \
86 OUT_BCS_RELOC(batch, \
88 I915_GEM_DOMAIN_INSTRUCTION, \
89 is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0, \
92 OUT_BCS_BATCH(batch, 0); \
94 OUT_BCS_BATCH(batch, 0); \
96 OUT_BCS_BATCH(batch, 0); \
99 #define OUT_BUFFER_MA_TARGET(buf_bo) OUT_BUFFER_X(buf_bo, 1, 1)
100 #define OUT_BUFFER_MA_REFERENCE(buf_bo) OUT_BUFFER_X(buf_bo, 0, 1)
101 #define OUT_BUFFER_NMA_TARGET(buf_bo) OUT_BUFFER_X(buf_bo, 1, 0)
102 #define OUT_BUFFER_NMA_REFERENCE(buf_bo) OUT_BUFFER_X(buf_bo, 0, 0)
105 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
106 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
107 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
109 #define HCP_SOFTWARE_SKYLAKE 1
111 #define NUM_HCPE_KERNEL 2
113 #define INTER_MODE_MASK 0x03
114 #define INTER_8X8 0x03
115 #define INTER_16X8 0x01
116 #define INTER_8X16 0x02
117 #define SUBMB_SHAPE_MASK 0x00FF00
119 #define INTER_MV8 (4 << 20)
120 #define INTER_MV32 (6 << 20)
127 hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC *iq_matrix)
130 memset(&iq_matrix->scaling_lists_4x4, 16, sizeof(iq_matrix->scaling_lists_4x4));
133 memset(&iq_matrix->scaling_lists_8x8, 16, sizeof(iq_matrix->scaling_lists_8x8));
136 memset(&iq_matrix->scaling_lists_16x16, 16, sizeof(iq_matrix->scaling_lists_16x16));
139 memset(&iq_matrix->scaling_lists_32x32, 16, sizeof(iq_matrix->scaling_lists_32x32));
141 /* Flat_16x16_dc_16 */
142 memset(&iq_matrix->scaling_list_dc_16x16, 16, sizeof(iq_matrix->scaling_list_dc_16x16));
144 /* Flat_32x32_dc_16 */
145 memset(&iq_matrix->scaling_list_dc_32x32, 16, sizeof(iq_matrix->scaling_list_dc_32x32));
148 /* HEVC picture and slice state related */
151 gen9_hcpe_pipe_mode_select(VADriverContextP ctx,
153 struct intel_encoder_context *encoder_context)
155 struct i965_driver_data *i965 = i965_driver_data(ctx);
156 struct intel_batchbuffer *batch = encoder_context->base.batch;
158 assert(standard_select == HCP_CODEC_HEVC);
160 if(IS_KBL(i965->intel.device_info))
162 BEGIN_BCS_BATCH(batch, 6);
164 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
168 BEGIN_BCS_BATCH(batch, 4);
170 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
174 (standard_select << 5) |
175 (0 << 3) | /* disable Pic Status / Error Report */
176 HCP_CODEC_SELECT_ENCODE);
177 OUT_BCS_BATCH(batch, 0);
178 OUT_BCS_BATCH(batch, 0);
180 if(IS_KBL(i965->intel.device_info))
182 OUT_BCS_BATCH(batch, 0);
183 OUT_BCS_BATCH(batch, 0);
186 ADVANCE_BCS_BATCH(batch);
190 gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state,
191 struct intel_encoder_context *encoder_context)
193 struct intel_batchbuffer *batch = encoder_context->base.batch;
194 struct object_surface *obj_surface = encode_state->reconstructed_object;
195 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
196 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
197 unsigned int surface_format = SURFACE_FORMAT_PLANAR_420_8;
200 unsigned int y_cb_offset;
204 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
205 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
207 assert(obj_surface->fourcc == VA_FOURCC_P010);
208 surface_format = SURFACE_FORMAT_P010;
211 y_cb_offset = obj_surface->y_cb_offset;
213 BEGIN_BCS_BATCH(batch, 3);
214 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
216 (1 << 28) | /* surface id */
217 (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */
219 surface_format << 28 |
221 ADVANCE_BCS_BATCH(batch);
223 BEGIN_BCS_BATCH(batch, 3);
224 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
226 (0 << 28) | /* surface id */
227 (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */
229 surface_format << 28 |
231 ADVANCE_BCS_BATCH(batch);
235 gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx, struct encode_state *encode_state,
236 struct intel_encoder_context *encoder_context)
238 struct i965_driver_data *i965 = i965_driver_data(ctx);
239 struct intel_batchbuffer *batch = encoder_context->base.batch;
240 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
241 struct object_surface *obj_surface;
242 GenHevcSurface *hcpe_hevc_surface;
246 if(IS_KBL(i965->intel.device_info))
248 BEGIN_BCS_BATCH(batch, 104);
250 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
254 BEGIN_BCS_BATCH(batch, 95);
256 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
259 obj_surface = encode_state->reconstructed_object;
260 assert(obj_surface && obj_surface->bo);
261 hcpe_hevc_surface = obj_surface->private_data;
262 assert(hcpe_hevc_surface && hcpe_hevc_surface->motion_vector_temporal_bo);
264 OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
265 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
266 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
267 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
268 OUT_BUFFER_MA_TARGET(mfc_context->metadata_line_buffer.bo); /* DW 13..15 */
269 OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_line_buffer.bo); /* DW 16..18 */
270 OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_column_buffer.bo); /* DW 19..21 */
271 OUT_BUFFER_MA_TARGET(mfc_context->sao_line_buffer.bo); /* DW 22..24 */
272 OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_line_buffer.bo); /* DW 25..27 */
273 OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_column_buffer.bo); /* DW 28..30 */
274 OUT_BUFFER_MA_TARGET(hcpe_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
275 OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
277 /* here only max 8 reference allowed */
278 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
279 bo = mfc_context->reference_surfaces[i].bo;
282 OUT_BUFFER_NMA_REFERENCE(bo);
284 OUT_BUFFER_NMA_REFERENCE(NULL);
286 OUT_BCS_BATCH(batch, 0); /* DW 53, memory address attributes */
288 OUT_BUFFER_MA_TARGET(mfc_context->uncompressed_picture_source.bo); /* DW 54..56, uncompressed picture source */
289 OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, ignore */
290 OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, ignore */
291 OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, ignore */
293 for (i = 0; i < ARRAY_ELEMS(mfc_context->current_collocated_mv_temporal_buffer) - 1; i++) {
294 bo = mfc_context->current_collocated_mv_temporal_buffer[i].bo;
297 OUT_BUFFER_NMA_REFERENCE(bo);
299 OUT_BUFFER_NMA_REFERENCE(NULL);
301 OUT_BCS_BATCH(batch, 0); /* DW 82, memory address attributes */
303 OUT_BUFFER_MA_TARGET(NULL); /* DW 83..85, ignore for HEVC */
304 OUT_BUFFER_MA_TARGET(NULL); /* DW 86..88, ignore for HEVC */
305 OUT_BUFFER_MA_TARGET(NULL); /* DW 89..91, ignore for HEVC */
306 OUT_BUFFER_MA_TARGET(NULL); /* DW 92..94, ignore for HEVC */
308 if(IS_KBL(i965->intel.device_info))
311 OUT_BCS_BATCH(batch, 0);
314 ADVANCE_BCS_BATCH(batch);
318 gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,
319 struct intel_encoder_context *encoder_context)
321 struct intel_batchbuffer *batch = encoder_context->base.batch;
322 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
325 BEGIN_BCS_BATCH(batch, 14);
327 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
328 OUT_BUFFER_MA_REFERENCE(NULL); /* DW 1..3 igonre for encoder*/
329 OUT_BUFFER_NMA_REFERENCE(NULL); /* DW 4..5, Upper Bound */
330 OUT_BUFFER_MA_TARGET(mfc_context->hcp_indirect_cu_object.bo); /* DW 6..8, CU */
331 /* DW 9..11, PAK-BSE */
333 mfc_context->hcp_indirect_pak_bse_object.bo,
334 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
335 mfc_context->hcp_indirect_pak_bse_object.offset);
336 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
339 mfc_context->hcp_indirect_pak_bse_object.bo,
340 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
341 mfc_context->hcp_indirect_pak_bse_object.end_offset);
342 OUT_BCS_BATCH(batch, 0);
344 ADVANCE_BCS_BATCH(batch);
348 gen9_hcpe_fqm_state(VADriverContextP ctx,
355 struct intel_encoder_context *encoder_context)
357 struct intel_batchbuffer *batch = encoder_context->base.batch;
358 unsigned int fqm_buffer[32];
360 assert(fqm_length <= 32);
361 assert(sizeof(*fqm) == 4);
362 memset(fqm_buffer, 0, sizeof(fqm_buffer));
363 memcpy(fqm_buffer, fqm, fqm_length * 4);
365 BEGIN_BCS_BATCH(batch, 34);
367 OUT_BCS_BATCH(batch, HCP_FQM_STATE | (34 - 2));
370 color_component << 3 |
373 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
375 ADVANCE_BCS_BATCH(batch);
380 gen9_hcpe_hevc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
382 unsigned int qm[32] = {
383 0x10001000, 0x10001000, 0x10001000, 0x10001000,
384 0x10001000, 0x10001000, 0x10001000, 0x10001000,
385 0x10001000, 0x10001000, 0x10001000, 0x10001000,
386 0x10001000, 0x10001000, 0x10001000, 0x10001000,
387 0x10001000, 0x10001000, 0x10001000, 0x10001000,
388 0x10001000, 0x10001000, 0x10001000, 0x10001000,
389 0x10001000, 0x10001000, 0x10001000, 0x10001000,
390 0x10001000, 0x10001000, 0x10001000, 0x10001000
393 gen9_hcpe_fqm_state(ctx,
397 gen9_hcpe_fqm_state(ctx,
401 gen9_hcpe_fqm_state(ctx,
405 gen9_hcpe_fqm_state(ctx,
409 gen9_hcpe_fqm_state(ctx,
413 gen9_hcpe_fqm_state(ctx,
417 gen9_hcpe_fqm_state(ctx,
421 gen9_hcpe_fqm_state(ctx,
428 gen9_hcpe_qm_state(VADriverContextP ctx,
435 struct intel_encoder_context *encoder_context)
437 struct intel_batchbuffer *batch = encoder_context->base.batch;
438 unsigned int qm_buffer[16];
440 assert(qm_length <= 16);
441 assert(sizeof(*qm) == 4);
442 memset(qm_buffer, 0, sizeof(qm_buffer));
443 memcpy(qm_buffer, qm, qm_length * 4);
445 BEGIN_BCS_BATCH(batch, 18);
447 OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
450 color_component << 3 |
453 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
455 ADVANCE_BCS_BATCH(batch);
459 gen9_hcpe_hevc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
464 unsigned int qm[16] = {
465 0x10101010, 0x10101010, 0x10101010, 0x10101010,
466 0x10101010, 0x10101010, 0x10101010, 0x10101010,
467 0x10101010, 0x10101010, 0x10101010, 0x10101010,
468 0x10101010, 0x10101010, 0x10101010, 0x10101010
471 for (i = 0; i < 6; i++) {
472 gen9_hcpe_qm_state(ctx,
478 for (i = 0; i < 6; i++) {
479 gen9_hcpe_qm_state(ctx,
485 for (i = 0; i < 6; i++) {
486 gen9_hcpe_qm_state(ctx,
492 for (i = 0; i < 2; i++) {
493 gen9_hcpe_qm_state(ctx,
501 gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state,
502 struct intel_encoder_context *encoder_context)
504 struct i965_driver_data *i965 = i965_driver_data(ctx);
505 struct intel_batchbuffer *batch = encoder_context->base.batch;
506 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
507 VAEncPictureParameterBufferHEVC *pic_param ;
508 VAEncSequenceParameterBufferHEVC *seq_param ;
510 int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
511 int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
515 * When not present, the value of loop_filter_across_tiles_enabled_flag
516 * is inferred to be equal to 1.
518 int loop_filter_across_tiles_enabled_flag = 0;
519 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
520 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
522 int log2_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
523 int log2_ctb_size = seq_param->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
524 int ctb_size = 1 << log2_ctb_size;
525 double rawctubits = 8 * 3 * ctb_size * ctb_size / 2.0;
526 int maxctubits = (int)(5 * rawctubits / 3) ;
527 double bitrate = seq_param->bits_per_second * 1.0;
528 double framebitrate = bitrate / 32 / 8; //32 byte unit
529 int minframebitrate = 0;//(int) (framebitrate * 3 / 10);
530 int maxframebitrate = (int)(framebitrate * 10 / 10);
531 int maxdeltaframebitrate = 0x1c5c; //(int) (framebitrate * 1/ 10);
532 int mindeltaframebitrate = 0; //(int) (framebitrate * 1/ 10);
533 int minframesize = 0;//(int)(rawframebits * 1/50);
535 if (seq_param->seq_fields.bits.pcm_enabled_flag) {
536 max_pcm_size_minus3 = seq_param->log2_max_pcm_luma_coding_block_size_minus3;
537 min_pcm_size_minus3 = seq_param->log2_min_pcm_luma_coding_block_size_minus3;
538 pcm_sample_bit_depth_luma_minus1 = (seq_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
539 pcm_sample_bit_depth_chroma_minus1 = (seq_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
541 max_pcm_size_minus3 = MIN(seq_param->log2_min_luma_coding_block_size_minus3 + seq_param->log2_diff_max_min_luma_coding_block_size, 2);
544 if (pic_param->pic_fields.bits.tiles_enabled_flag)
545 loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
547 /* set zero for encoder */
548 loop_filter_across_tiles_enabled_flag = 0;
550 if(IS_KBL(i965->intel.device_info))
552 BEGIN_BCS_BATCH(batch, 31);
554 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (31 - 2));
558 BEGIN_BCS_BATCH(batch, 19);
560 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
564 mfc_context->pic_size.picture_height_in_min_cb_minus1 << 16 |
566 mfc_context->pic_size.picture_width_in_min_cb_minus1);
568 max_pcm_size_minus3 << 10 |
569 min_pcm_size_minus3 << 8 |
570 (seq_param->log2_min_transform_block_size_minus2 +
571 seq_param->log2_diff_max_min_transform_block_size) << 6 |
572 seq_param->log2_min_transform_block_size_minus2 << 4 |
573 (seq_param->log2_min_luma_coding_block_size_minus3 +
574 seq_param->log2_diff_max_min_luma_coding_block_size) << 2 |
575 seq_param->log2_min_luma_coding_block_size_minus3);
576 OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
578 (IS_KBL(i965->intel.device_info)? 1 : 0) << 27 | /* CU packet structure is 0 for SKL */
579 seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
580 pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
581 seq_param->seq_fields.bits.amp_enabled_flag << 23 |
582 pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
583 0 << 21 | /* 0 for encoder !(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)*/
584 0 << 20 | /* 0 for encoder !!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_FIELD_PIC)*/
585 pic_param->pic_fields.bits.weighted_pred_flag << 19 |
586 pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
587 pic_param->pic_fields.bits.tiles_enabled_flag << 17 | /* 0 for encoder */
588 pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
589 loop_filter_across_tiles_enabled_flag << 15 |
590 pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 | /* 0 for encoder */
591 pic_param->log2_parallel_merge_level_minus2 << 10 | /* 0 for encoder */
592 pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 | /* 0 for encoder */
593 seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8 |
594 (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 | /* 0 for encoder */
595 pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 | /* 0 for encoder */
596 seq_param->seq_fields.bits.pcm_enabled_flag << 4 |
597 seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */
600 seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27 | /* 10 bit for KBL+*/
601 seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24 | /* 10 bit for KBL+ */
602 pcm_sample_bit_depth_luma_minus1 << 20 |
603 pcm_sample_bit_depth_chroma_minus1 << 16 |
604 seq_param->max_transform_hierarchy_depth_inter << 13 | /* for encoder */
605 seq_param->max_transform_hierarchy_depth_intra << 10 | /* for encoder */
606 (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
607 (pic_param->pps_cb_qp_offset & 0x1f));
609 0 << 29 | /* must be 0 for encoder */
610 maxctubits); /* DW 6, max LCU bit size allowed for encoder */
612 0 << 31 | /* frame bitrate max unit */
613 maxframebitrate); /* DW 7, frame bitrate max 0:13 */
615 0 << 31 | /* frame bitrate min unit */
616 minframebitrate); /* DW 8, frame bitrate min 0:13 */
618 maxdeltaframebitrate << 16 | /* frame bitrate max delta ,help to select deltaQP of slice*/
619 mindeltaframebitrate); /* DW 9,(0,14) frame bitrate min delta ,help to select deltaQP of slice*/
620 OUT_BCS_BATCH(batch, 0x07050402); /* DW 10, frame delta qp max */
621 OUT_BCS_BATCH(batch, 0x0d0b0908);
622 OUT_BCS_BATCH(batch, 0); /* DW 12, frame delta qp min */
623 OUT_BCS_BATCH(batch, 0);
624 OUT_BCS_BATCH(batch, 0x04030200); /* DW 14, frame delta qp max range */
625 OUT_BCS_BATCH(batch, 0x100c0806); /* DW 15 */
626 OUT_BCS_BATCH(batch, 0x04030200); /* DW 16, frame delta qp min range */
627 OUT_BCS_BATCH(batch, 0x100c0806);
630 minframesize); /* DW 18, min frame size units */
632 if(IS_KBL(i965->intel.device_info))
636 for(i = 0;i < 12;i++)
637 OUT_BCS_BATCH(batch, 0);
640 ADVANCE_BCS_BATCH(batch);
645 gen9_hcpe_hevc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
646 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
647 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
648 struct intel_batchbuffer *batch)
651 batch = encoder_context->base.batch;
653 if (data_bits_in_last_dw == 0)
654 data_bits_in_last_dw = 32;
656 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
658 OUT_BCS_BATCH(batch, HCP_INSERT_PAK_OBJECT | (lenght_in_dws + 2 - 2));
660 (0 << 31) | /* inline payload */
661 (0 << 16) | /* always start at offset 0 */
662 (0 << 15) | /* HeaderLengthExcludeFrmSize */
663 (data_bits_in_last_dw << 8) |
664 (skip_emul_byte_count << 4) |
665 (!!emulation_flag << 3) |
666 ((!!is_last_header) << 2) |
667 ((!!is_end_of_slice) << 1) |
668 (0 << 0)); /* Reserved */
669 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
671 ADVANCE_BCS_BATCH(batch);
676 intel_get_ref_idx_state_1(VAPictureHEVC *va_pic, unsigned int frame_store_id)
678 unsigned int is_long_term =
679 !!(va_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
680 unsigned int is_top_field =
681 !!!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
682 unsigned int is_bottom_field =
683 !!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
685 return ((is_long_term << 6) |
686 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
687 (frame_store_id << 1) |
688 ((is_top_field ^ 1) & is_bottom_field));
692 gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch,
694 struct intel_encoder_context *encoder_context,
695 struct encode_state *encode_state)
698 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
699 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
700 uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
701 VAPictureHEVC *ref_list = (list ? slice_param->ref_pic_list1 : slice_param->ref_pic_list0);
702 struct gen6_vme_context *vme_context = encoder_context->vme_context;
703 struct object_surface *obj_surface;
706 int ref_idx_l0 = (vme_context->ref_index_in_mb[list] & 0xff);
708 if (ref_idx_l0 > 3) {
709 WARN_ONCE("ref_idx_l0 is out of range\n");
713 obj_surface = vme_context->used_reference_objects[list];
715 for (i = 0; i < 16; i++) {
717 obj_surface == encode_state->reference_objects[i]) {
722 if (frame_index == -1) {
723 WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n");
726 BEGIN_BCS_BATCH(batch, 18);
728 OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
730 num_ref_minus1 << 1 |
733 for (i = 0; i < 16; i++) {
734 if (i < MIN((num_ref_minus1 + 1), 15)) {
735 VAPictureHEVC *ref_pic = &ref_list[i];
736 VAPictureHEVC *curr_pic = &pic_param->decoded_curr_pic;
739 1 << 15 | /* bottom_field_flag 0 */
740 0 << 14 | /* field_pic_flag 0 */
741 !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 | /* short term is 1 */
742 0 << 12 | /* disable WP */
743 0 << 11 | /* disable WP */
745 (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
747 OUT_BCS_BATCH(batch, 0);
751 ADVANCE_BCS_BATCH(batch);
755 intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,
756 struct encode_state *encode_state,
757 struct intel_encoder_context *encoder_context
760 struct intel_batchbuffer *batch = encoder_context->base.batch;
761 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
763 if (slice_param->slice_type == HEVC_SLICE_I)
766 gen9_hcpe_ref_idx_state_1(batch, 0, encoder_context, encode_state);
768 if (slice_param->slice_type == HEVC_SLICE_P)
771 gen9_hcpe_ref_idx_state_1(batch, 1, encoder_context, encode_state);
775 gen9_hcpe_hevc_slice_state(VADriverContextP ctx,
776 VAEncPictureParameterBufferHEVC *pic_param,
777 VAEncSliceParameterBufferHEVC *slice_param,
778 struct encode_state *encode_state,
779 struct intel_encoder_context *encoder_context,
780 struct intel_batchbuffer *batch)
782 struct i965_driver_data *i965 = i965_driver_data(ctx);
783 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
784 int slice_type = slice_param->slice_type;
786 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
787 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
788 int ctb_size = 1 << log2_ctb_size;
789 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
790 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
791 int last_slice = (((slice_param->slice_segment_address + slice_param->num_ctu_in_slice) == (width_in_ctb * height_in_ctb)) ? 1 : 0);
793 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
795 slice_hor_pos = slice_param->slice_segment_address % width_in_ctb;
796 slice_ver_pos = slice_param->slice_segment_address / width_in_ctb;
798 next_slice_hor_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) % width_in_ctb;
799 next_slice_ver_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) / width_in_ctb;
801 /* only support multi slice begin from row start address */
802 assert((slice_param->slice_segment_address % width_in_ctb) == 0);
804 if (last_slice == 1) {
805 if (slice_param->slice_segment_address == 0) {
806 next_slice_hor_pos = 0;
807 next_slice_ver_pos = height_in_ctb;
809 next_slice_hor_pos = 0;
810 next_slice_ver_pos = 0;
814 if(IS_KBL(i965->intel.device_info))
816 BEGIN_BCS_BATCH(batch, 11);
818 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (11 - 2));
822 BEGIN_BCS_BATCH(batch, 9);
824 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
828 slice_ver_pos << 16 |
831 next_slice_ver_pos << 16 |
834 (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
835 (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
836 (pic_param->pic_init_qp + slice_param->slice_qp_delta) << 6 |
837 slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag << 5 |
838 slice_param->slice_fields.bits.dependent_slice_segment_flag << 4 |
843 (slice_param->max_num_merge_cand - 1) << 23 |
844 slice_param->slice_fields.bits.cabac_init_flag << 22 |
845 slice_param->luma_log2_weight_denom << 19 |
846 (slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) << 16 |
847 slice_param->slice_fields.bits.collocated_from_l0_flag << 15 |
848 (slice_type != HEVC_SLICE_B) << 14 |
849 slice_param->slice_fields.bits.mvd_l1_zero_flag << 13 |
850 slice_param->slice_fields.bits.slice_sao_luma_flag << 12 |
851 slice_param->slice_fields.bits.slice_sao_chroma_flag << 11 |
852 slice_param->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag << 10 |
853 (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
854 (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
855 slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag);
856 OUT_BCS_BATCH(batch, 0); /* DW 5 ,ignore for encoder.*/
862 1 << 10 | /* header insertion enable */
863 1 << 9 | /* slice data enable */
864 1 << 8 | /* tail insertion enable, must at end of frame, not slice */
865 1 << 2 | /* RBSP or EBSP, EmulationByteSliceInsertEnable */
866 1 << 1 | /* cabacZeroWordInsertionEnable */
867 0); /* Ignored for decoding */
868 OUT_BCS_BATCH(batch, 0); /* PAK-BSE data start offset */
870 if(IS_KBL(i965->intel.device_info))
872 OUT_BCS_BATCH(batch, 0);
873 OUT_BCS_BATCH(batch, 0);
876 ADVANCE_BCS_BATCH(batch);
879 /* HEVC pipe line related */
880 static void gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,
881 struct encode_state *encode_state,
882 struct intel_encoder_context *encoder_context)
884 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
886 mfc_context->pipe_mode_select(ctx, HCP_CODEC_HEVC, encoder_context);
887 mfc_context->set_surface_state(ctx, encode_state, encoder_context);
888 gen9_hcpe_pipe_buf_addr_state(ctx, encode_state, encoder_context);
889 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
891 mfc_context->qm_state(ctx, encoder_context);
892 mfc_context->fqm_state(ctx, encoder_context);
893 mfc_context->pic_state(ctx, encode_state, encoder_context);
894 intel_hcpe_hevc_ref_idx_state(ctx, encode_state, encoder_context);
897 static void gen9_hcpe_init(VADriverContextP ctx,
898 struct encode_state *encode_state,
899 struct intel_encoder_context *encoder_context)
902 struct i965_driver_data *i965 = i965_driver_data(ctx);
903 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
904 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
905 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
908 int slice_batchbuffer_size;
909 int slice_type = slice_param->slice_type;
910 int is_inter = (slice_type != HEVC_SLICE_I);
912 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
913 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
914 int ctb_size = 1 << log2_ctb_size;
915 int cu_size = 1 << log2_cu_size;
917 int width_in_ctb = ALIGN(pSequenceParameter->pic_width_in_luma_samples , ctb_size) / ctb_size;
918 int height_in_ctb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, ctb_size) / ctb_size;
919 int width_in_cu = ALIGN(pSequenceParameter->pic_width_in_luma_samples , cu_size) / cu_size;
920 int height_in_cu = ALIGN(pSequenceParameter->pic_height_in_luma_samples, cu_size) / cu_size;
921 int width_in_mb = ALIGN(pSequenceParameter->pic_width_in_luma_samples , 16) / 16;
922 int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16;
924 int num_cu_record = 64;
927 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
928 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
931 if (log2_ctb_size == 5) num_cu_record = 16;
932 else if (log2_ctb_size == 4) num_cu_record = 4;
933 else if (log2_ctb_size == 6) num_cu_record = 64;
935 /* frame size in samples, cu,ctu, mb */
936 mfc_context->pic_size.picture_width_in_samples = pSequenceParameter->pic_width_in_luma_samples;
937 mfc_context->pic_size.picture_height_in_samples = pSequenceParameter->pic_height_in_luma_samples;
938 mfc_context->pic_size.ctb_size = ctb_size;
939 mfc_context->pic_size.picture_width_in_ctbs = width_in_ctb;
940 mfc_context->pic_size.picture_height_in_ctbs = height_in_ctb;
941 mfc_context->pic_size.min_cb_size = cu_size;
942 mfc_context->pic_size.picture_width_in_min_cb_minus1 = width_in_cu - 1;
943 mfc_context->pic_size.picture_height_in_min_cb_minus1 = height_in_cu - 1;
944 mfc_context->pic_size.picture_width_in_mbs = width_in_mb;
945 mfc_context->pic_size.picture_height_in_mbs = height_in_mb;
947 slice_batchbuffer_size = 64 * width_in_ctb * width_in_ctb + 4096 +
948 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
950 /*Encode common setup for HCP*/
952 dri_bo_unreference(mfc_context->deblocking_filter_line_buffer.bo);
953 mfc_context->deblocking_filter_line_buffer.bo = NULL;
955 dri_bo_unreference(mfc_context->deblocking_filter_tile_line_buffer.bo);
956 mfc_context->deblocking_filter_tile_line_buffer.bo = NULL;
958 dri_bo_unreference(mfc_context->deblocking_filter_tile_column_buffer.bo);
959 mfc_context->deblocking_filter_tile_column_buffer.bo = NULL;
962 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
963 mfc_context->uncompressed_picture_source.bo = NULL;
966 dri_bo_unreference(mfc_context->metadata_line_buffer.bo);
967 mfc_context->metadata_line_buffer.bo = NULL;
969 dri_bo_unreference(mfc_context->metadata_tile_line_buffer.bo);
970 mfc_context->metadata_tile_line_buffer.bo = NULL;
972 dri_bo_unreference(mfc_context->metadata_tile_column_buffer.bo);
973 mfc_context->metadata_tile_column_buffer.bo = NULL;
976 dri_bo_unreference(mfc_context->sao_line_buffer.bo);
977 mfc_context->sao_line_buffer.bo = NULL;
979 dri_bo_unreference(mfc_context->sao_tile_line_buffer.bo);
980 mfc_context->sao_tile_line_buffer.bo = NULL;
982 dri_bo_unreference(mfc_context->sao_tile_column_buffer.bo);
983 mfc_context->sao_tile_column_buffer.bo = NULL;
985 /* mv temporal buffer */
986 for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
987 if (mfc_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
988 dri_bo_unreference(mfc_context->current_collocated_mv_temporal_buffer[i].bo);
989 mfc_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
993 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
994 if (mfc_context->reference_surfaces[i].bo != NULL)
995 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
996 mfc_context->reference_surfaces[i].bo = NULL;
999 /* indirect data CU recording */
1000 dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1001 mfc_context->hcp_indirect_cu_object.bo = NULL;
1003 dri_bo_unreference(mfc_context->hcp_indirect_pak_bse_object.bo);
1004 mfc_context->hcp_indirect_pak_bse_object.bo = NULL;
1006 /* Current internal buffer for HCP */
1008 size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> size_shift;
1010 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size);
1011 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1013 size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> size_shift;
1015 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1018 size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 188 + 9 * width_in_ctb + 1023) >> 9;
1020 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1022 size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 172 + 9 * width_in_ctb + 1023) >> 9;
1024 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1026 size = (((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4) * 176 + 89 * width_in_ctb + 1023) >> 9;
1028 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1030 size = (pSequenceParameter->pic_width_in_luma_samples + 8 * width_in_ctb + 1023) >> 9;
1032 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1034 size = (pSequenceParameter->pic_width_in_luma_samples + 16 * width_in_ctb + 1023) >> 9;
1036 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1038 size = (pSequenceParameter->pic_height_in_luma_samples + 8 * height_in_ctb + 1023) >> 9;
1040 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1043 size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> size_shift;
1045 ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size);
1047 size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> size_shift;
1049 ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size);
1051 size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> size_shift;
1053 ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size);
1055 /////////////////////
1056 dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1057 bo = dri_bo_alloc(i965->intel.bufmgr,
1058 "Indirect data CU Buffer",
1059 width_in_ctb * height_in_ctb * num_cu_record * 16 * 4,
1062 mfc_context->hcp_indirect_cu_object.bo = bo;
1064 /* to do pak bse object buffer */
1065 /* to do current collocated mv temporal buffer */
1067 dri_bo_unreference(mfc_context->hcp_batchbuffer_surface.bo);
1068 mfc_context->hcp_batchbuffer_surface.bo = NULL;
1070 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1071 mfc_context->aux_batchbuffer_surface.bo = NULL;
1073 if (mfc_context->aux_batchbuffer)
1074 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1076 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
1077 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
1078 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1079 mfc_context->aux_batchbuffer_surface.pitch = 16;
1080 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
1081 mfc_context->aux_batchbuffer_surface.size_block = 16;
1084 static VAStatus gen9_hcpe_run(VADriverContextP ctx,
1085 struct encode_state *encode_state,
1086 struct intel_encoder_context *encoder_context)
1088 struct intel_batchbuffer *batch = encoder_context->base.batch;
1090 intel_batchbuffer_flush(batch); //run the pipeline
1092 return VA_STATUS_SUCCESS;
1097 gen9_hcpe_stop(VADriverContextP ctx,
1098 struct encode_state *encode_state,
1099 struct intel_encoder_context *encoder_context,
1100 int *encoded_bits_size)
1102 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1103 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1104 VACodedBufferSegment *coded_buffer_segment;
1106 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1107 assert(vaStatus == VA_STATUS_SUCCESS);
1108 *encoded_bits_size = coded_buffer_segment->size * 8;
1109 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1111 return VA_STATUS_SUCCESS;
1115 int intel_hevc_find_skipemulcnt(unsigned char *buf, int bits_length)
1119 int leading_zero_cnt, byte_length, zero_byte;
1123 #define NAL_UNIT_TYPE_MASK 0x7e
1124 #define HW_MAX_SKIP_LENGTH 15
1126 byte_length = ALIGN(bits_length, 32) >> 3;
1129 leading_zero_cnt = 0;
1131 for (i = 0; i < byte_length - 4; i++) {
1132 if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
1133 ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
1140 /* warning message is complained. But anyway it will be inserted. */
1141 WARN_ONCE("Invalid packed header data. "
1142 "Can't find the 000001 start_prefix code\n");
1145 i = leading_zero_cnt;
1148 if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
1151 skip_cnt = leading_zero_cnt + zero_byte + 3;
1153 /* the unit header byte is accounted */
1154 nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
1156 skip_cnt += 1; /* two bytes length of nal headers in hevc */
1158 if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
1159 /* more unit header bytes are accounted for MVC/SVC */
1162 if (skip_cnt > HW_MAX_SKIP_LENGTH) {
1163 WARN_ONCE("Too many leading zeros are padded for packed data. "
1164 "It is beyond the HW range.!!!\n");
1169 #ifdef HCP_SOFTWARE_SKYLAKE
1172 gen9_hcpe_hevc_pak_object(VADriverContextP ctx, int lcu_x, int lcu_y, int isLast_ctb,
1173 struct intel_encoder_context *encoder_context,
1174 int cu_count_in_lcu, unsigned int split_coding_unit_flag,
1175 struct intel_batchbuffer *batch)
1177 struct i965_driver_data *i965 = i965_driver_data(ctx);
1178 int len_in_dwords = 3;
1180 if(IS_KBL(i965->intel.device_info))
1184 batch = encoder_context->base.batch;
1186 BEGIN_BCS_BATCH(batch, len_in_dwords);
1188 OUT_BCS_BATCH(batch, HCP_PAK_OBJECT | (len_in_dwords - 2));
1189 OUT_BCS_BATCH(batch,
1190 (((isLast_ctb > 0) ? 1 : 0) << 31) | /* last ctb?*/
1191 ((cu_count_in_lcu - 1) << 24) | /* No motion vector */
1192 split_coding_unit_flag);
1194 OUT_BCS_BATCH(batch, (lcu_y << 16) | lcu_x); /* LCU for Y*/
1196 if(IS_KBL(i965->intel.device_info))
1198 OUT_BCS_BATCH(batch, 0);
1199 OUT_BCS_BATCH(batch, 0);
1202 ADVANCE_BCS_BATCH(batch);
1204 return len_in_dwords;
1207 #define AVC_INTRA_RDO_OFFSET 4
1208 #define AVC_INTER_RDO_OFFSET 10
1209 #define AVC_INTER_MSG_OFFSET 8
1210 #define AVC_INTER_MV_OFFSET 48
1211 #define AVC_RDO_MASK 0xFFFF
1213 #define AVC_INTRA_MODE_MASK 0x30
1214 #define AVC_INTRA_16X16 0x00
1215 #define AVC_INTRA_8X8 0x01
1216 #define AVC_INTRA_4X4 0x02
1218 #define AVC_INTER_MODE_MASK 0x03
1219 #define AVC_INTER_8X8 0x03
1220 #define AVC_INTER_8X16 0x02
1221 #define AVC_INTER_16X8 0x01
1222 #define AVC_INTER_16X16 0x00
1223 #define AVC_SUBMB_SHAPE_MASK 0x00FF00
1225 /* VME output message, write back message */
1226 #define AVC_INTER_SUBMB_PRE_MODE_MASK 0x00ff0000
1227 #define AVC_SUBMB_SHAPE_MASK 0x00FF00
1229 /* here 1 MB = 1CU = 16x16 */
1231 gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
1232 struct encode_state *encode_state,
1233 struct intel_encoder_context *encoder_context,
1234 int qp, unsigned int *msg,
1235 int ctb_x, int ctb_y,
1237 int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type,int cu_index,int index)
1239 /* here cu == mb, so we use mb address as the cu address */
1240 /* to fill the indirect cu by the vme out */
1241 static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
1242 static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
1243 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1244 unsigned char * cu_record_ptr = NULL;
1245 unsigned int * cu_msg = NULL;
1246 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1247 int mb_address_in_ctb = 0;
1248 int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1251 int intraMbMode = 0;
1252 int cu_part_mode = 0;
1254 int inerpred_idc = 0;
1255 int intra_chroma_mode = 5;
1259 int chroma_mode_remap[4]={5,4,3,2};
1261 if (!is_inter) inerpred_idc = 0xff;
1263 intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1265 intra_chroma_mode = (msg[3] & 0x3);
1266 intra_chroma_mode = chroma_mode_remap[intra_chroma_mode];
1267 if (intraMbMode == AVC_INTRA_16X16) {
1268 cu_part_mode = 0; //2Nx2N
1272 intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1273 intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1274 intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1275 intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1276 } else if (intraMbMode == AVC_INTRA_8X8) {
1277 cu_part_mode = 0; //2Nx2N
1281 intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1282 intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1283 intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1284 intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1286 } else { // for 4x4 to use 8x8 replace
1287 cu_part_mode = 3; //NxN
1291 intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
1292 intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
1293 intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
1294 intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
1298 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1299 /* get the mb info from the vme out */
1300 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1302 cu_msg[0] = (inerpred_idc << 24 | /* interpred_idc[3:0][1:0] */
1303 zero << 23 | /* reserved */
1304 qp << 16 | /* CU_qp */
1305 zero << 11 | /* reserved */
1306 intra_chroma_mode << 8 | /* intra_chroma_mode */
1307 zero << 7 | /* IPCM_enable , reserved for SKL*/
1308 cu_part_mode << 4 | /* cu_part_mode */
1309 zero << 3 | /* cu_transquant_bypass_flag */
1310 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1311 cu_size /* cu_size */
1313 cu_msg[1] = (zero << 30 | /* reserved */
1314 intraMode[3] << 24 | /* intra_mode */
1315 zero << 22 | /* reserved */
1316 intraMode[2] << 16 | /* intra_mode */
1317 zero << 14 | /* reserved */
1318 intraMode[1] << 8 | /* intra_mode */
1319 zero << 6 | /* reserved */
1320 intraMode[0] /* intra_mode */
1322 /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1323 cu_msg[2] = (zero << 16 | /* mvx_l0[1] */
1324 zero /* mvx_l0[0] */
1326 cu_msg[3] = (zero << 16 | /* mvx_l0[3] */
1327 zero /* mvx_l0[2] */
1329 cu_msg[4] = (zero << 16 | /* mvy_l0[1] */
1330 zero /* mvy_l0[0] */
1332 cu_msg[5] = (zero << 16 | /* mvy_l0[3] */
1333 zero /* mvy_l0[2] */
1336 cu_msg[6] = (zero << 16 | /* mvx_l1[1] */
1337 zero /* mvx_l1[0] */
1339 cu_msg[7] = (zero << 16 | /* mvx_l1[3] */
1340 zero /* mvx_l1[2] */
1342 cu_msg[8] = (zero << 16 | /* mvy_l1[1] */
1343 zero /* mvy_l1[0] */
1345 cu_msg[9] = (zero << 16 | /* mvy_l1[3] */
1346 zero /* mvy_l1[2] */
1349 cu_msg[10] = (zero << 28 | /* ref_idx_l1[3] */
1350 zero << 24 | /* ref_idx_l1[2] */
1351 zero << 20 | /* ref_idx_l1[1] */
1352 zero << 16 | /* ref_idx_l1[0] */
1353 zero << 12 | /* ref_idx_l0[3] */
1354 zero << 8 | /* ref_idx_l0[2] */
1355 zero << 4 | /* ref_idx_l0[1] */
1356 zero /* ref_idx_l0[0] */
1359 cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1360 cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1361 zero << 16 | /* reserved */
1362 zero /* tu_xform_Yskip[15:0] */
1364 cu_msg[13] = (zero << 16 | /* tu_xform_Vskip[15:0] */
1365 zero /* tu_xform_Uskip[15:0] */
1371 /* here 1 MB = 1CU = 16x16 */
1373 gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
1374 struct encode_state *encode_state,
1375 struct intel_encoder_context *encoder_context,
1376 int qp, unsigned int *msg,
1377 int ctb_x, int ctb_y,
1379 int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index,int index)
1381 /* here cu == mb, so we use mb address as the cu address */
1382 /* to fill the indirect cu by the vme out */
1383 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1384 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1385 unsigned char * cu_record_ptr = NULL;
1386 unsigned int * cu_msg = NULL;
1387 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1388 int mb_address_in_ctb = 0;
1389 int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1391 int cu_part_mode = 0;
1392 int submb_pre_mode = 0;
1399 unsigned int *mv_ptr;
1401 inter_mode = (msg[0] & AVC_INTER_MODE_MASK);
1402 submb_pre_mode = (msg[1] & AVC_INTER_SUBMB_PRE_MODE_MASK) >> 16;
1403 #define MSG_MV_OFFSET 4
1404 mv_ptr = msg + MSG_MV_OFFSET;
1405 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1406 * to convert them to be compatible with the format of AVC_PAK
1409 /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
1411 if (inter_mode == AVC_INTER_16X16) {
1412 mv_ptr[4] = mv_ptr[0];
1413 mv_ptr[5] = mv_ptr[1];
1414 mv_ptr[2] = mv_ptr[0];
1415 mv_ptr[3] = mv_ptr[1];
1416 mv_ptr[6] = mv_ptr[0];
1417 mv_ptr[7] = mv_ptr[1];
1422 } else if (inter_mode == AVC_INTER_8X16) {
1423 mv_ptr[4] = mv_ptr[0];
1424 mv_ptr[5] = mv_ptr[1];
1425 mv_ptr[2] = mv_ptr[8];
1426 mv_ptr[3] = mv_ptr[9];
1427 mv_ptr[6] = mv_ptr[8];
1428 mv_ptr[7] = mv_ptr[9];
1433 } else if (inter_mode == AVC_INTER_16X8) {
1434 mv_ptr[2] = mv_ptr[0];
1435 mv_ptr[3] = mv_ptr[1];
1436 mv_ptr[4] = mv_ptr[16];
1437 mv_ptr[5] = mv_ptr[17];
1438 mv_ptr[6] = mv_ptr[24];
1439 mv_ptr[7] = mv_ptr[25];
1444 }else if(inter_mode == AVC_INTER_8X8) {
1445 mv_ptr[0] = mv_ptr[index * 8 + 0 ];
1446 mv_ptr[1] = mv_ptr[index * 8 + 1 ];
1447 mv_ptr[2] = mv_ptr[index * 8 + 0 ];
1448 mv_ptr[3] = mv_ptr[index * 8 + 1 ];
1449 mv_ptr[4] = mv_ptr[index * 8 + 0 ];
1450 mv_ptr[5] = mv_ptr[index * 8 + 1 ];
1451 mv_ptr[6] = mv_ptr[index * 8 + 0 ];
1452 mv_ptr[7] = mv_ptr[index * 8 + 1 ];
1460 mv_ptr[4] = mv_ptr[0];
1461 mv_ptr[5] = mv_ptr[1];
1462 mv_ptr[2] = mv_ptr[0];
1463 mv_ptr[3] = mv_ptr[1];
1464 mv_ptr[6] = mv_ptr[0];
1465 mv_ptr[7] = mv_ptr[1];
1474 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1475 /* get the mb info from the vme out */
1476 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1478 cu_msg[0] = (submb_pre_mode << 24 | /* interpred_idc[3:0][1:0] */
1479 zero << 23 | /* reserved */
1480 qp << 16 | /* CU_qp */
1481 zero << 11 | /* reserved */
1482 5 << 8 | /* intra_chroma_mode */
1483 zero << 7 | /* IPCM_enable , reserved for SKL*/
1484 cu_part_mode << 4 | /* cu_part_mode */
1485 zero << 3 | /* cu_transquant_bypass_flag */
1486 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1487 cu_size /* cu_size */
1489 cu_msg[1] = (zero << 30 | /* reserved */
1490 zero << 24 | /* intra_mode */
1491 zero << 22 | /* reserved */
1492 zero << 16 | /* intra_mode */
1493 zero << 14 | /* reserved */
1494 zero << 8 | /* intra_mode */
1495 zero << 6 | /* reserved */
1496 zero /* intra_mode */
1498 /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1499 cu_msg[2] = ((mv_ptr[2] & 0xffff) << 16 | /* mvx_l0[1] */
1500 (mv_ptr[0] & 0xffff) /* mvx_l0[0] */
1502 cu_msg[3] = ((mv_ptr[6] & 0xffff) << 16 | /* mvx_l0[3] */
1503 (mv_ptr[4] & 0xffff) /* mvx_l0[2] */
1505 cu_msg[4] = ((mv_ptr[2] & 0xffff0000) | /* mvy_l0[1] */
1506 (mv_ptr[0] & 0xffff0000) >> 16 /* mvy_l0[0] */
1508 cu_msg[5] = ((mv_ptr[6] & 0xffff0000) | /* mvy_l0[3] */
1509 (mv_ptr[4] & 0xffff0000) >> 16 /* mvy_l0[2] */
1512 cu_msg[6] = ((mv_ptr[3] & 0xffff) << 16 | /* mvx_l1[1] */
1513 (mv_ptr[1] & 0xffff) /* mvx_l1[0] */
1515 cu_msg[7] = ((mv_ptr[7] & 0xffff) << 16 | /* mvx_l1[3] */
1516 (mv_ptr[5] & 0xffff) /* mvx_l1[2] */
1518 cu_msg[8] = ((mv_ptr[3] & 0xffff0000) | /* mvy_l1[1] */
1519 (mv_ptr[1] & 0xffff0000) >> 16 /* mvy_l1[0] */
1521 cu_msg[9] = ((mv_ptr[7] & 0xffff0000) | /* mvy_l1[3] */
1522 (mv_ptr[5] & 0xffff0000) >> 16 /* mvy_l1[2] */
1525 cu_msg[10] = (((vme_context->ref_index_in_mb[1] >> 24) & 0xf) << 28 | /* ref_idx_l1[3] */
1526 ((vme_context->ref_index_in_mb[1] >> 16) & 0xf) << 24 | /* ref_idx_l1[2] */
1527 ((vme_context->ref_index_in_mb[1] >> 8) & 0xf) << 20 | /* ref_idx_l1[1] */
1528 ((vme_context->ref_index_in_mb[1] >> 0) & 0xf) << 16 | /* ref_idx_l1[0] */
1529 ((vme_context->ref_index_in_mb[0] >> 24) & 0xf) << 12 | /* ref_idx_l0[3] */
1530 ((vme_context->ref_index_in_mb[0] >> 16) & 0xf) << 8 | /* ref_idx_l0[2] */
1531 ((vme_context->ref_index_in_mb[0] >> 8) & 0xf) << 4 | /* ref_idx_l0[1] */
1532 ((vme_context->ref_index_in_mb[0] >> 0) & 0xf) /* ref_idx_l0[0] */
1535 cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1536 cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1537 zero << 16 | /* reserved */
1538 zero /* tu_xform_Yskip[15:0] */
1540 cu_msg[13] = (zero << 16 | /* tu_xform_Vskip[15:0] */
1541 zero /* tu_xform_Uskip[15:0] */
1547 #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1548 #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1549 #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1550 #define HEVC_SPLIT_CU_FLAG_8_8 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1554 intel_hevc_slice_insert_packed_data(VADriverContextP ctx,
1555 struct encode_state *encode_state,
1556 struct intel_encoder_context *encoder_context,
1558 struct intel_batchbuffer *slice_batch)
1560 int count, i, start_index;
1561 unsigned int length_in_bits;
1562 VAEncPackedHeaderParameterBuffer *param = NULL;
1563 unsigned int *header_data = NULL;
1564 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1565 int slice_header_index;
1567 if (encode_state->slice_header_index[slice_index] == 0)
1568 slice_header_index = -1;
1570 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1572 count = encode_state->slice_rawdata_count[slice_index];
1573 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1575 for (i = 0; i < count; i++) {
1576 unsigned int skip_emul_byte_cnt;
1578 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1580 param = (VAEncPackedHeaderParameterBuffer *)
1581 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1583 /* skip the slice header packed data type as it is lastly inserted */
1584 if (param->type == VAEncPackedHeaderSlice)
1587 length_in_bits = param->bit_length;
1589 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1591 /* as the slice header is still required, the last header flag is set to
1594 mfc_context->insert_object(ctx,
1597 ALIGN(length_in_bits, 32) >> 5,
1598 length_in_bits & 0x1f,
1602 !param->has_emulation_bytes,
1606 if (slice_header_index == -1) {
1607 unsigned char *slice_header = NULL;
1608 int slice_header_length_in_bits = 0;
1609 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1610 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1611 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1613 /* For the Normal HEVC */
1614 slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter,
1619 mfc_context->insert_object(ctx, encoder_context,
1620 (unsigned int *)slice_header,
1621 ALIGN(slice_header_length_in_bits, 32) >> 5,
1622 slice_header_length_in_bits & 0x1f,
1623 5, /* first 6 bytes are start code + nal unit type */
1624 1, 0, 1, slice_batch);
1627 unsigned int skip_emul_byte_cnt;
1629 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1631 param = (VAEncPackedHeaderParameterBuffer *)
1632 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1633 length_in_bits = param->bit_length;
1635 /* as the slice header is the last header data for one slice,
1636 * the last header flag is set to one.
1638 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1640 mfc_context->insert_object(ctx,
1643 ALIGN(length_in_bits, 32) >> 5,
1644 length_in_bits & 0x1f,
1648 !param->has_emulation_bytes,
1656 gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
1657 struct encode_state *encode_state,
1658 struct intel_encoder_context *encoder_context,
1660 struct intel_batchbuffer *slice_batch)
1662 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1663 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1664 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1665 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1666 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1667 int qp_slice = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1668 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1669 //unsigned char *slice_header = NULL; // for future use
1670 //int slice_header_length_in_bits = 0;
1671 unsigned int tail_data[] = { 0x0, 0x0 };
1672 int slice_type = pSliceParameter->slice_type;
1674 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1675 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1676 int ctb_size = 1 << log2_ctb_size;
1677 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
1678 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
1679 int last_slice = (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice) == (width_in_ctb * height_in_ctb);
1680 int ctb_width_in_mb = (ctb_size + 15) / 16;
1681 int i_ctb, ctb_x, ctb_y;
1682 unsigned int split_coding_unit_flag = 0;
1683 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1684 int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size)> 0 ? 1:0;
1685 int col_pad_flag = (pSequenceParameter->pic_width_in_luma_samples % ctb_size)> 0 ? 1:0;
1687 int is_intra = (slice_type == HEVC_SLICE_I);
1688 unsigned int *msg = NULL;
1689 unsigned char *msg_ptr = NULL;
1690 int macroblock_address = 0;
1691 int num_cu_record = 64;
1693 int tmp_mb_mode = 0;
1694 int mb_x = 0, mb_y = 0;
1697 int inter_rdo, intra_rdo;
1699 int drop_cu_row_in_last_mb = 0;
1700 int drop_cu_column_in_last_mb = 0;
1702 if (log2_ctb_size == 5) num_cu_record = 16;
1703 else if (log2_ctb_size == 4) num_cu_record = 4;
1704 else if (log2_ctb_size == 6) num_cu_record = 64;
1707 if (rate_control_mode == VA_RC_CBR) {
1708 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1709 if(slice_type == HEVC_SLICE_B) {
1710 if(pSequenceParameter->ip_period == 1)
1712 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1714 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
1715 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1718 if (encode_state->slice_header_index[slice_index] == 0) {
1719 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1723 /* only support for 8-bit pixel bit-depth */
1724 assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 <= 2);
1725 assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 <= 2);
1726 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1727 assert(qp >= 0 && qp < 52);
1730 gen9_hcpe_hevc_slice_state(ctx,
1733 encode_state, encoder_context,
1736 if (slice_index == 0)
1737 intel_hcpe_hevc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1739 intel_hevc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1742 slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header, slice_index);
1743 int skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)slice_header, slice_header_length_in_bits);
1745 mfc_context->insert_object(ctx, encoder_context,
1746 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1748 1, 0, 1, slice_batch);
1755 split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1757 dri_bo_map(vme_context->vme_output.bo , 1);
1758 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1759 dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
1761 for (i_ctb = pSliceParameter->slice_segment_address;i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1762 int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
1763 int ctb_height_in_mb_internal = ctb_width_in_mb;
1764 int ctb_width_in_mb_internal = ctb_width_in_mb;
1765 int max_cu_num_in_mb = 4;
1767 ctb_x = i_ctb % width_in_ctb;
1768 ctb_y = i_ctb / width_in_ctb;
1770 drop_cu_row_in_last_mb = 0;
1771 drop_cu_column_in_last_mb = 0;
1773 if(ctb_y == (height_in_ctb - 1) && row_pad_flag)
1775 ctb_height_in_mb_internal = (pSequenceParameter->pic_height_in_luma_samples - (ctb_y * ctb_size) + 15)/16;
1777 if((log2_cu_size == 3) && (pSequenceParameter->pic_height_in_luma_samples % 16))
1778 drop_cu_row_in_last_mb = (16 - (pSequenceParameter->pic_height_in_luma_samples % 16))>>log2_cu_size;
1781 if(ctb_x == (width_in_ctb - 1) && col_pad_flag)
1783 ctb_width_in_mb_internal = (pSequenceParameter->pic_width_in_luma_samples - (ctb_x * ctb_size) + 15) / 16;
1785 if((log2_cu_size == 3) && (pSequenceParameter->pic_width_in_luma_samples % 16))
1786 drop_cu_column_in_last_mb = (16 - (pSequenceParameter->pic_width_in_luma_samples % 16))>>log2_cu_size;
1791 macroblock_address = ctb_y * width_in_mbs * ctb_width_in_mb + ctb_x * ctb_width_in_mb;
1792 split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1797 for (mb_y = 0; mb_y < ctb_height_in_mb_internal; mb_y++)
1799 mb_addr = macroblock_address + mb_y * width_in_mbs ;
1800 for (mb_x = 0; mb_x < ctb_width_in_mb_internal; mb_x++)
1802 max_cu_num_in_mb = 4;
1803 if(drop_cu_row_in_last_mb && (mb_y == ctb_height_in_mb_internal - 1))
1804 max_cu_num_in_mb /= 2;
1806 if(drop_cu_column_in_last_mb && (mb_x == ctb_width_in_mb_internal - 1))
1807 max_cu_num_in_mb /= 2;
1809 /* get the mb info from the vme out */
1810 msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
1812 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1813 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1814 /*fill to indirect cu */
1816 if (is_intra || intra_rdo < inter_rdo) {
1818 tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1819 if(max_cu_num_in_mb < 4){
1820 if(tmp_mb_mode == AVC_INTRA_16X16)
1822 msg[0] = (msg[0] & !AVC_INTRA_MODE_MASK) | (AVC_INTRA_8X8<<4);
1823 tmp_mb_mode = AVC_INTRA_8X8;
1826 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1827 if(--max_cu_num_in_mb > 0)
1828 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1830 if(ctb_width_in_mb == 2)
1831 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1832 else if(ctb_width_in_mb == 1)
1833 split_coding_unit_flag |= 0x1 << 20;
1835 else if(tmp_mb_mode == AVC_INTRA_16X16) {
1836 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1837 } else { // for 4x4 to use 8x8 replace
1838 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1839 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1840 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1841 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1842 if(ctb_width_in_mb == 2)
1843 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1844 else if(ctb_width_in_mb == 1)
1845 split_coding_unit_flag |= 0x1 << 20;
1848 msg += AVC_INTER_MSG_OFFSET;
1850 tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
1851 if(max_cu_num_in_mb < 4)
1853 if(tmp_mb_mode != AVC_INTER_8X8)
1855 msg[0] = (msg[0] & !AVC_INTER_MODE_MASK) | AVC_INTER_8X8;
1856 tmp_mb_mode = AVC_INTER_8X8;
1858 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1859 if(--max_cu_num_in_mb > 0)
1860 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1862 if(ctb_width_in_mb == 2)
1863 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1864 else if(ctb_width_in_mb == 1)
1865 split_coding_unit_flag |= 0x1 << 20;
1867 else if (tmp_mb_mode == AVC_INTER_8X8){
1868 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1869 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1870 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1871 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1872 if(ctb_width_in_mb == 2)
1873 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1874 else if(ctb_width_in_mb == 1)
1875 split_coding_unit_flag |= 0x1 << 20;
1877 }else if(tmp_mb_mode == AVC_INTER_16X16 ||
1878 tmp_mb_mode == AVC_INTER_8X16 ||
1879 tmp_mb_mode == AVC_INTER_16X8) {
1880 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1887 cu_count = cu_index;
1888 // PAK object fill accordingly.
1889 gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
1892 dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1893 dri_bo_unmap(vme_context->vme_output.bo);
1896 mfc_context->insert_object(ctx, encoder_context,
1898 2, 1, 1, 0, slice_batch);
1900 mfc_context->insert_object(ctx, encoder_context,
1902 1, 1, 1, 0, slice_batch);
1907 gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
1908 struct encode_state *encode_state,
1909 struct intel_encoder_context *encoder_context)
1911 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1912 struct intel_batchbuffer *batch;
1916 batch = mfc_context->aux_batchbuffer;
1917 batch_bo = batch->buffer;
1919 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1920 gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1923 intel_batchbuffer_align(batch, 8);
1925 BEGIN_BCS_BATCH(batch, 2);
1926 OUT_BCS_BATCH(batch, 0);
1927 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1928 ADVANCE_BCS_BATCH(batch);
1930 dri_bo_reference(batch_bo);
1931 intel_batchbuffer_free(batch);
1932 mfc_context->aux_batchbuffer = NULL;
1942 gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,
1943 struct encode_state *encode_state,
1944 struct intel_encoder_context *encoder_context)
1946 struct i965_driver_data *i965 = i965_driver_data(ctx);
1947 struct intel_batchbuffer *batch = encoder_context->base.batch;
1948 dri_bo *slice_batch_bo;
1950 #ifdef HCP_SOFTWARE_SKYLAKE
1951 slice_batch_bo = gen9_hcpe_hevc_software_batchbuffer(ctx, encode_state, encoder_context);
1953 slice_batch_bo = gen9_hcpe_hevc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1957 if (i965->intel.has_bsd2)
1958 intel_batchbuffer_start_atomic_bcs_override(batch, 0x4000, BSD_RING0);
1960 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1961 intel_batchbuffer_emit_mi_flush(batch);
1963 // picture level programing
1964 gen9_hcpe_hevc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1966 BEGIN_BCS_BATCH(batch, 3);
1967 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1968 OUT_BCS_RELOC(batch,
1970 I915_GEM_DOMAIN_COMMAND, 0,
1972 OUT_BCS_BATCH(batch, 0);
1973 ADVANCE_BCS_BATCH(batch);
1976 intel_batchbuffer_end_atomic(batch);
1978 dri_bo_unreference(slice_batch_bo);
1981 void intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,
1982 struct encode_state *encode_state,
1983 struct intel_encoder_context *encoder_context,
1984 struct intel_batchbuffer *slice_batch)
1986 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1987 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS);
1988 unsigned int skip_emul_byte_cnt;
1990 if (encode_state->packed_header_data[idx]) {
1991 VAEncPackedHeaderParameterBuffer *param = NULL;
1992 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1993 unsigned int length_in_bits;
1995 assert(encode_state->packed_header_param[idx]);
1996 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1997 length_in_bits = param->bit_length;
1999 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2000 mfc_context->insert_object(ctx,
2003 ALIGN(length_in_bits, 32) >> 5,
2004 length_in_bits & 0x1f,
2008 !param->has_emulation_bytes,
2012 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS) + 1; // index to SPS
2014 if (encode_state->packed_header_data[idx]) {
2015 VAEncPackedHeaderParameterBuffer *param = NULL;
2016 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2017 unsigned int length_in_bits;
2019 assert(encode_state->packed_header_param[idx]);
2020 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2021 length_in_bits = param->bit_length;
2023 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2024 mfc_context->insert_object(ctx,
2027 ALIGN(length_in_bits, 32) >> 5,
2028 length_in_bits & 0x1f,
2032 !param->has_emulation_bytes,
2036 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_PPS);
2038 if (encode_state->packed_header_data[idx]) {
2039 VAEncPackedHeaderParameterBuffer *param = NULL;
2040 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2041 unsigned int length_in_bits;
2043 assert(encode_state->packed_header_param[idx]);
2044 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2045 length_in_bits = param->bit_length;
2047 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2049 mfc_context->insert_object(ctx,
2052 ALIGN(length_in_bits, 32) >> 5,
2053 length_in_bits & 0x1f,
2057 !param->has_emulation_bytes,
2061 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_SEI);
2063 if (encode_state->packed_header_data[idx]) {
2064 VAEncPackedHeaderParameterBuffer *param = NULL;
2065 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2066 unsigned int length_in_bits;
2068 assert(encode_state->packed_header_param[idx]);
2069 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2070 length_in_bits = param->bit_length;
2072 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2073 mfc_context->insert_object(ctx,
2076 ALIGN(length_in_bits, 32) >> 5,
2077 length_in_bits & 0x1f,
2081 !param->has_emulation_bytes,
2086 VAStatus intel_hcpe_hevc_prepare(VADriverContextP ctx,
2087 struct encode_state *encode_state,
2088 struct intel_encoder_context *encoder_context)
2090 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2091 struct object_surface *obj_surface;
2092 struct object_buffer *obj_buffer;
2093 GenHevcSurface *hevc_encoder_surface;
2095 VAStatus vaStatus = VA_STATUS_SUCCESS;
2097 struct i965_coded_buffer_segment *coded_buffer_segment;
2099 /*Setup all the input&output object*/
2101 /* Setup current frame and current direct mv buffer*/
2102 obj_surface = encode_state->reconstructed_object;
2104 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2105 assert(hevc_encoder_surface);
2107 if (hevc_encoder_surface) {
2108 hevc_encoder_surface->has_p010_to_nv12_done=0;
2109 hevc_encoder_surface->base.frame_store_id = -1;
2110 mfc_context->current_collocated_mv_temporal_buffer[NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS - 1].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2111 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2114 mfc_context->surface_state.width = obj_surface->orig_width;
2115 mfc_context->surface_state.height = obj_surface->orig_height;
2116 mfc_context->surface_state.w_pitch = obj_surface->width;
2117 mfc_context->surface_state.h_pitch = obj_surface->height;
2119 /* Setup reference frames and direct mv buffers*/
2120 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2121 obj_surface = encode_state->reference_objects[i];
2123 if (obj_surface && obj_surface->bo) {
2124 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2125 dri_bo_reference(obj_surface->bo);
2127 /* Check MV temporal buffer */
2128 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2129 assert(hevc_encoder_surface);
2131 if (hevc_encoder_surface) {
2132 hevc_encoder_surface->base.frame_store_id = -1;
2133 /* Setup MV temporal buffer */
2134 mfc_context->current_collocated_mv_temporal_buffer[i].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2135 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2143 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
2144 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2146 obj_buffer = encode_state->coded_buf_object;
2147 bo = obj_buffer->buffer_store->bo;
2148 mfc_context->hcp_indirect_pak_bse_object.bo = bo;
2149 mfc_context->hcp_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2150 mfc_context->hcp_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2151 dri_bo_reference(mfc_context->hcp_indirect_pak_bse_object.bo);
2154 coded_buffer_segment = (struct i965_coded_buffer_segment *)(bo->virtual);
2155 coded_buffer_segment->mapped = 0;
2156 coded_buffer_segment->codec = encoder_context->codec;
2162 /* HEVC BRC related */
2165 intel_hcpe_bit_rate_control_context_init(struct encode_state *encode_state,
2166 struct gen9_hcpe_context *mfc_context)
2168 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2170 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2171 int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2173 float fps = pSequenceParameter->vui_time_scale / pSequenceParameter->vui_num_units_in_tick ;
2174 double bitrate = pSequenceParameter->bits_per_second * 1.0;
2175 int inter_mb_size = bitrate * 1.0 / (fps + 4.0) / width_in_mbs / height_in_mbs;
2176 int intra_mb_size = inter_mb_size * 5.0;
2179 mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_mb_size = intra_mb_size;
2180 mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
2181 mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_mb_size = inter_mb_size;
2182 mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2183 mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_mb_size = inter_mb_size;
2184 mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2186 for (i = 0 ; i < 3; i++) {
2187 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
2188 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
2189 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
2190 mfc_context->bit_rate_control_context[i].GrowInit = 6;
2191 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
2192 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
2193 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
2195 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
2196 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
2197 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
2198 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
2199 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
2200 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
2203 mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord = (intra_mb_size + 16) / 16;
2204 mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord = (inter_mb_size + 16) / 16;
2205 mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord = (inter_mb_size + 16) / 16;
2207 mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord * 1.5;
2208 mfc_context->bit_rate_control_context[HEVC_SLICE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord * 1.5;
2209 mfc_context->bit_rate_control_context[HEVC_SLICE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord * 1.5;
2212 static void intel_hcpe_brc_init(struct encode_state *encode_state,
2213 struct intel_encoder_context* encoder_context)
2215 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2216 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2217 VAEncMiscParameterHRD* pParameterHRD = NULL;
2218 VAEncMiscParameterBuffer* pMiscParamHRD = NULL;
2220 double bitrate = pSequenceParameter->bits_per_second * 1.0;
2221 double framerate = (double)pSequenceParameter->vui_time_scale / (double)pSequenceParameter->vui_num_units_in_tick;
2222 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
2223 int intra_period = pSequenceParameter->intra_period;
2224 int ip_period = pSequenceParameter->ip_period;
2225 double qp1_size = 0.1 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2226 double qp51_size = 0.001 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2231 double buffer_size = 0;
2234 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) ||
2235 (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2238 qp1_size = qp1_size * bpp;
2239 qp51_size = qp51_size * bpp;
2241 if (!encode_state->misc_param[VAEncMiscParameterTypeHRD][0] || !encode_state->misc_param[VAEncMiscParameterTypeHRD][0]->buffer)
2244 pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD][0]->buffer;
2245 pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
2247 if (pSequenceParameter->ip_period) {
2248 pnum = (intra_period + ip_period - 1) / ip_period - 1;
2249 bnum = intra_period - inum - pnum;
2252 mfc_context->brc.mode = encoder_context->rate_control_mode;
2254 mfc_context->brc.target_frame_size[HEVC_SLICE_I] = (int)((double)((bitrate * intra_period) / framerate) /
2255 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
2256 mfc_context->brc.target_frame_size[HEVC_SLICE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2257 mfc_context->brc.target_frame_size[HEVC_SLICE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2259 mfc_context->brc.gop_nums[HEVC_SLICE_I] = inum;
2260 mfc_context->brc.gop_nums[HEVC_SLICE_P] = pnum;
2261 mfc_context->brc.gop_nums[HEVC_SLICE_B] = bnum;
2263 bpf = mfc_context->brc.bits_per_frame = bitrate / framerate;
2265 if (!pParameterHRD || pParameterHRD->buffer_size <= 0)
2267 mfc_context->hrd.buffer_size = bitrate * ratio;
2268 mfc_context->hrd.current_buffer_fullness =
2269 (double)(bitrate * ratio/2 < mfc_context->hrd.buffer_size) ?
2270 bitrate * ratio/2 : mfc_context->hrd.buffer_size / 2.;
2273 buffer_size = (double)pParameterHRD->buffer_size ;
2274 if(buffer_size < bitrate * ratio_min)
2276 buffer_size = bitrate * ratio_min;
2277 }else if (buffer_size > bitrate * ratio_max)
2279 buffer_size = bitrate * ratio_max ;
2281 mfc_context->hrd.buffer_size =buffer_size;
2282 if(pParameterHRD->initial_buffer_fullness > 0)
2284 mfc_context->hrd.current_buffer_fullness =
2285 (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
2286 pParameterHRD->initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
2289 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size / 2.;
2294 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size / 2.;
2295 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size / qp1_size;
2296 mfc_context->hrd.violation_noted = 0;
2298 if ((bpf > qp51_size) && (bpf < qp1_size)) {
2299 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
2300 } else if (bpf >= qp1_size)
2301 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 1;
2302 else if (bpf <= qp51_size)
2303 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51;
2305 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2306 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2308 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 36);
2309 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 40);
2310 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 45);
2313 int intel_hcpe_update_hrd(struct encode_state *encode_state,
2314 struct gen9_hcpe_context *mfc_context,
2317 double prev_bf = mfc_context->hrd.current_buffer_fullness;
2319 mfc_context->hrd.current_buffer_fullness -= frame_bits;
2321 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
2322 mfc_context->hrd.current_buffer_fullness = prev_bf;
2323 return BRC_UNDERFLOW;
2326 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
2327 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
2328 if (mfc_context->brc.mode == VA_RC_VBR)
2329 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
2331 mfc_context->hrd.current_buffer_fullness = prev_bf;
2332 return BRC_OVERFLOW;
2335 return BRC_NO_HRD_VIOLATION;
2338 int intel_hcpe_brc_postpack(struct encode_state *encode_state,
2339 struct gen9_hcpe_context *mfc_context,
2342 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
2343 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2344 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2345 int slicetype = pSliceParameter->slice_type;
2346 int qpi = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2347 int qpp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2348 int qpb = mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY;
2349 int qp; // quantizer of previously encoded slice of current type
2350 int qpn; // predicted quantizer for next frame of current type in integer format
2351 double qpf; // predicted quantizer for next frame of current type in float format
2352 double delta_qp; // QP correction
2353 int target_frame_size, frame_size_next;
2355 * x - how far we are from HRD buffer borders
2356 * y - how far we are from target HRD buffer fullness
2359 double frame_size_alpha;
2361 if(slicetype == HEVC_SLICE_B) {
2362 if(pSequenceParameter->ip_period == 1)
2364 slicetype = HEVC_SLICE_P;
2365 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2366 slicetype = HEVC_SLICE_P;
2370 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
2372 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
2373 if (mfc_context->hrd.buffer_capacity < 5)
2374 frame_size_alpha = 0;
2376 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
2377 if (frame_size_alpha > 30) frame_size_alpha = 30;
2378 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
2379 (double)(frame_size_alpha + 1.);
2381 /* frame_size_next: avoiding negative number and too small value */
2382 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
2383 frame_size_next = (int)((double)target_frame_size * 0.25);
2385 qpf = (double)qp * target_frame_size / frame_size_next;
2386 qpn = (int)(qpf + 0.5);
2389 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
2390 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
2391 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
2393 mfc_context->brc.qpf_rounding_accumulator = 0.;
2394 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
2396 mfc_context->brc.qpf_rounding_accumulator = 0.;
2399 /* making sure that QP is not changing too fast */
2400 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
2401 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
2402 /* making sure that with QP predictions we did do not leave QPs range */
2403 BRC_CLIP(qpn, 1, 51);
2405 /* checking wthether HRD compliance is still met */
2406 sts = intel_hcpe_update_hrd(encode_state, mfc_context, frame_bits);
2408 /* calculating QP delta as some function*/
2409 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
2411 x /= mfc_context->hrd.target_buffer_fullness;
2412 y = mfc_context->hrd.current_buffer_fullness;
2414 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
2415 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
2417 if (y < 0.01) y = 0.01;
2419 else if (x < -1) x = -1;
2421 delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
2422 qpn = (int)(qpn + delta_qp + 0.5);
2424 /* making sure that with QP predictions we did do not leave QPs range */
2425 BRC_CLIP(qpn, 1, 51);
2427 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
2428 /* correcting QPs of slices of other types */
2429 if (slicetype == HEVC_SLICE_P) {
2430 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
2431 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
2432 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
2433 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
2434 } else if (slicetype == HEVC_SLICE_I) {
2435 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
2436 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
2437 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
2438 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
2439 } else { // HEVC_SLICE_B
2440 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
2441 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
2442 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
2443 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
2445 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
2446 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
2447 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
2448 } else if (sts == BRC_UNDERFLOW) { // underflow
2449 if (qpn <= qp) qpn = qp + 1;
2452 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
2454 } else if (sts == BRC_OVERFLOW) {
2455 if (qpn >= qp) qpn = qp - 1;
2456 if (qpn < 1) { // < 0 (?) overflow with minQP
2458 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
2462 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
2467 static void intel_hcpe_hrd_context_init(struct encode_state *encode_state,
2468 struct intel_encoder_context *encoder_context)
2470 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2471 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2472 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2473 int target_bit_rate = pSequenceParameter->bits_per_second;
2475 // current we only support CBR mode.
2476 if (rate_control_mode == VA_RC_CBR) {
2477 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
2478 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
2479 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
2480 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
2481 mfc_context->vui_hrd.i_frame_number = 0;
2483 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
2484 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
2485 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
2491 intel_hcpe_hrd_context_update(struct encode_state *encode_state,
2492 struct gen9_hcpe_context *mfc_context)
2494 mfc_context->vui_hrd.i_frame_number++;
2497 int intel_hcpe_interlace_check(VADriverContextP ctx,
2498 struct encode_state *encode_state,
2499 struct intel_encoder_context *encoder_context)
2501 VAEncSliceParameterBufferHEVC *pSliceParameter;
2502 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2503 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2504 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2505 int ctb_size = 1 << log2_ctb_size;
2506 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2507 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2511 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2512 pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
2513 ctbCount += pSliceParameter->num_ctu_in_slice;
2516 if (ctbCount == (width_in_ctb * height_in_ctb))
2523 * Check whether the parameters related with CBR are updated and decide whether
2524 * it needs to reinitialize the configuration related with CBR.
2525 * Currently it will check the following parameters:
2528 * gop_configuration(intra_period, ip_period, intra_idr_period)
2530 static bool intel_hcpe_brc_updated_check(struct encode_state *encode_state,
2531 struct intel_encoder_context *encoder_context)
2534 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2535 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2536 double cur_fps, cur_bitrate;
2537 VAEncSequenceParameterBufferHEVC *pSequenceParameter;
2540 if (rate_control_mode != VA_RC_CBR) {
2544 pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2546 cur_bitrate = pSequenceParameter->bits_per_second;
2547 cur_fps = (double)pSequenceParameter->vui_time_scale /
2548 (double)pSequenceParameter->vui_num_units_in_tick;
2550 if ((cur_bitrate == mfc_context->brc.saved_bps) &&
2551 (cur_fps == mfc_context->brc.saved_fps) &&
2552 (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
2553 (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
2554 (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
2555 /* the parameters related with CBR are not updaetd */
2559 mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
2560 mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
2561 mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
2562 mfc_context->brc.saved_fps = cur_fps;
2563 mfc_context->brc.saved_bps = cur_bitrate;
2567 void intel_hcpe_brc_prepare(struct encode_state *encode_state,
2568 struct intel_encoder_context *encoder_context)
2570 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2571 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2573 if (rate_control_mode == VA_RC_CBR) {
2575 assert(encoder_context->codec != CODEC_MPEG2);
2577 brc_updated = intel_hcpe_brc_updated_check(encode_state, encoder_context);
2579 /*Programing bit rate control */
2580 if ((mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord == 0) ||
2582 intel_hcpe_bit_rate_control_context_init(encode_state, mfc_context);
2583 intel_hcpe_brc_init(encode_state, encoder_context);
2586 /*Programing HRD control */
2587 if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated)
2588 intel_hcpe_hrd_context_init(encode_state, encoder_context);
2592 /* HEVC interface API for encoder */
2595 gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,
2596 struct encode_state *encode_state,
2597 struct intel_encoder_context *encoder_context)
2599 struct gen9_hcpe_context *hcpe_context = encoder_context->mfc_context;
2600 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2601 int current_frame_bits_size;
2605 gen9_hcpe_init(ctx, encode_state, encoder_context);
2606 intel_hcpe_hevc_prepare(ctx, encode_state, encoder_context);
2607 /*Programing bcs pipeline*/
2608 gen9_hcpe_hevc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
2609 gen9_hcpe_run(ctx, encode_state, encoder_context);
2610 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
2611 gen9_hcpe_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
2612 sts = intel_hcpe_brc_postpack(encode_state, hcpe_context, current_frame_bits_size);
2613 if (sts == BRC_NO_HRD_VIOLATION) {
2614 intel_hcpe_hrd_context_update(encode_state, hcpe_context);
2616 } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
2617 if (!hcpe_context->hrd.violation_noted) {
2618 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
2619 hcpe_context->hrd.violation_noted = 1;
2621 return VA_STATUS_SUCCESS;
2628 return VA_STATUS_SUCCESS;
2632 gen9_hcpe_context_destroy(void *context)
2634 struct gen9_hcpe_context *hcpe_context = context;
2637 dri_bo_unreference(hcpe_context->deblocking_filter_line_buffer.bo);
2638 hcpe_context->deblocking_filter_line_buffer.bo = NULL;
2640 dri_bo_unreference(hcpe_context->deblocking_filter_tile_line_buffer.bo);
2641 hcpe_context->deblocking_filter_tile_line_buffer.bo = NULL;
2643 dri_bo_unreference(hcpe_context->deblocking_filter_tile_column_buffer.bo);
2644 hcpe_context->deblocking_filter_tile_column_buffer.bo = NULL;
2646 dri_bo_unreference(hcpe_context->uncompressed_picture_source.bo);
2647 hcpe_context->uncompressed_picture_source.bo = NULL;
2649 dri_bo_unreference(hcpe_context->metadata_line_buffer.bo);
2650 hcpe_context->metadata_line_buffer.bo = NULL;
2652 dri_bo_unreference(hcpe_context->metadata_tile_line_buffer.bo);
2653 hcpe_context->metadata_tile_line_buffer.bo = NULL;
2655 dri_bo_unreference(hcpe_context->metadata_tile_column_buffer.bo);
2656 hcpe_context->metadata_tile_column_buffer.bo = NULL;
2658 dri_bo_unreference(hcpe_context->sao_line_buffer.bo);
2659 hcpe_context->sao_line_buffer.bo = NULL;
2661 dri_bo_unreference(hcpe_context->sao_tile_line_buffer.bo);
2662 hcpe_context->sao_tile_line_buffer.bo = NULL;
2664 dri_bo_unreference(hcpe_context->sao_tile_column_buffer.bo);
2665 hcpe_context->sao_tile_column_buffer.bo = NULL;
2667 /* mv temporal buffer */
2668 for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
2669 if (hcpe_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
2670 dri_bo_unreference(hcpe_context->current_collocated_mv_temporal_buffer[i].bo);
2671 hcpe_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
2674 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2675 dri_bo_unreference(hcpe_context->reference_surfaces[i].bo);
2676 hcpe_context->reference_surfaces[i].bo = NULL;
2679 dri_bo_unreference(hcpe_context->hcp_indirect_cu_object.bo);
2680 hcpe_context->hcp_indirect_cu_object.bo = NULL;
2682 dri_bo_unreference(hcpe_context->hcp_indirect_pak_bse_object.bo);
2683 hcpe_context->hcp_indirect_pak_bse_object.bo = NULL;
2685 dri_bo_unreference(hcpe_context->hcp_batchbuffer_surface.bo);
2686 hcpe_context->hcp_batchbuffer_surface.bo = NULL;
2688 dri_bo_unreference(hcpe_context->aux_batchbuffer_surface.bo);
2689 hcpe_context->aux_batchbuffer_surface.bo = NULL;
2691 if (hcpe_context->aux_batchbuffer)
2692 intel_batchbuffer_free(hcpe_context->aux_batchbuffer);
2694 hcpe_context->aux_batchbuffer = NULL;
2699 VAStatus gen9_hcpe_pipeline(VADriverContextP ctx,
2701 struct encode_state *encode_state,
2702 struct intel_encoder_context *encoder_context)
2707 case VAProfileHEVCMain:
2708 case VAProfileHEVCMain10:
2709 vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context);
2713 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2720 Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2722 struct gen9_hcpe_context *hcpe_context = calloc(1, sizeof(struct gen9_hcpe_context));
2724 assert(hcpe_context);
2725 hcpe_context->pipe_mode_select = gen9_hcpe_pipe_mode_select;
2726 hcpe_context->set_surface_state = gen9_hcpe_surface_state;
2727 hcpe_context->ind_obj_base_addr_state = gen9_hcpe_ind_obj_base_addr_state;
2728 hcpe_context->pic_state = gen9_hcpe_hevc_pic_state;
2729 hcpe_context->qm_state = gen9_hcpe_hevc_qm_state;
2730 hcpe_context->fqm_state = gen9_hcpe_hevc_fqm_state;
2731 hcpe_context->insert_object = gen9_hcpe_hevc_insert_object;
2732 hcpe_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2734 encoder_context->mfc_context = hcpe_context;
2735 encoder_context->mfc_context_destroy = gen9_hcpe_context_destroy;
2736 encoder_context->mfc_pipeline = gen9_hcpe_pipeline;
2737 encoder_context->mfc_brc_prepare = intel_hcpe_brc_prepare;
2739 hevc_gen_default_iq_matrix_encoder(&hcpe_context->iq_matrix_hevc);