2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Qu Pengfei <Pengfei.Qu@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
43 #include "intel_media.h"
45 typedef enum _gen6_brc_status {
46 BRC_NO_HRD_VIOLATION = 0,
49 BRC_UNDERFLOW_WITH_MAX_QP = 3,
50 BRC_OVERFLOW_WITH_MIN_QP = 4,
54 #define BRC_CLIP(x, min, max) \
56 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
59 #define BRC_P_B_QP_DIFF 4
60 #define BRC_I_P_QP_DIFF 2
61 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
63 #define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */
64 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
66 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
67 #define BRC_CY 0.1 /* weight for */
68 #define BRC_CX_UNDERFLOW 5.
69 #define BRC_CX_OVERFLOW -4.
71 #define BRC_PI_0_5 1.5707963267948966192313216916398
73 /* intel buffer write */
74 #define ALLOC_ENCODER_BUFFER(gen_buffer, string, size) do { \
75 dri_bo_unreference(gen_buffer->bo); \
76 gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr, \
80 assert(gen_buffer->bo); \
84 #define OUT_BUFFER_X(buf_bo, is_target, ma) do { \
86 OUT_BCS_RELOC64(batch, \
88 I915_GEM_DOMAIN_INSTRUCTION, \
89 is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0, \
92 OUT_BCS_BATCH(batch, 0); \
93 OUT_BCS_BATCH(batch, 0); \
96 OUT_BCS_BATCH(batch, i965->intel.mocs_state); \
99 #define OUT_BUFFER_MA_TARGET(buf_bo) OUT_BUFFER_X(buf_bo, 1, 1)
100 #define OUT_BUFFER_MA_REFERENCE(buf_bo) OUT_BUFFER_X(buf_bo, 0, 1)
101 #define OUT_BUFFER_NMA_TARGET(buf_bo) OUT_BUFFER_X(buf_bo, 1, 0)
102 #define OUT_BUFFER_NMA_REFERENCE(buf_bo) OUT_BUFFER_X(buf_bo, 0, 0)
105 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
106 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
107 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
109 #define HCP_SOFTWARE_SKYLAKE 1
111 #define NUM_HCPE_KERNEL 2
113 #define INTER_MODE_MASK 0x03
114 #define INTER_8X8 0x03
115 #define INTER_16X8 0x01
116 #define INTER_8X16 0x02
117 #define SUBMB_SHAPE_MASK 0x00FF00
119 #define INTER_MV8 (4 << 20)
120 #define INTER_MV32 (6 << 20)
127 hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC *iq_matrix)
130 memset(&iq_matrix->scaling_lists_4x4, 16, sizeof(iq_matrix->scaling_lists_4x4));
133 memset(&iq_matrix->scaling_lists_8x8, 16, sizeof(iq_matrix->scaling_lists_8x8));
136 memset(&iq_matrix->scaling_lists_16x16, 16, sizeof(iq_matrix->scaling_lists_16x16));
139 memset(&iq_matrix->scaling_lists_32x32, 16, sizeof(iq_matrix->scaling_lists_32x32));
141 /* Flat_16x16_dc_16 */
142 memset(&iq_matrix->scaling_list_dc_16x16, 16, sizeof(iq_matrix->scaling_list_dc_16x16));
144 /* Flat_32x32_dc_16 */
145 memset(&iq_matrix->scaling_list_dc_32x32, 16, sizeof(iq_matrix->scaling_list_dc_32x32));
148 /* HEVC picture and slice state related */
151 gen9_hcpe_pipe_mode_select(VADriverContextP ctx,
153 struct intel_encoder_context *encoder_context)
155 struct i965_driver_data *i965 = i965_driver_data(ctx);
156 struct intel_batchbuffer *batch = encoder_context->base.batch;
158 assert(standard_select == HCP_CODEC_HEVC);
160 if (IS_KBL(i965->intel.device_info) ||
161 IS_GLK(i965->intel.device_info)) {
162 BEGIN_BCS_BATCH(batch, 6);
164 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
166 BEGIN_BCS_BATCH(batch, 4);
168 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
172 (standard_select << 5) |
173 (0 << 3) | /* disable Pic Status / Error Report */
174 HCP_CODEC_SELECT_ENCODE);
175 OUT_BCS_BATCH(batch, 0);
176 OUT_BCS_BATCH(batch, 0);
178 if (IS_KBL(i965->intel.device_info) ||
179 IS_GLK(i965->intel.device_info)) {
180 OUT_BCS_BATCH(batch, 0);
181 OUT_BCS_BATCH(batch, 0);
184 ADVANCE_BCS_BATCH(batch);
188 gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state,
189 struct intel_encoder_context *encoder_context)
191 struct intel_batchbuffer *batch = encoder_context->base.batch;
192 struct object_surface *obj_surface = encode_state->reconstructed_object;
193 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
194 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
195 unsigned int surface_format = SURFACE_FORMAT_PLANAR_420_8;
198 unsigned int y_cb_offset;
202 if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
203 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) {
204 assert(obj_surface->fourcc == VA_FOURCC_P010);
205 surface_format = SURFACE_FORMAT_P010;
208 y_cb_offset = obj_surface->y_cb_offset;
210 BEGIN_BCS_BATCH(batch, 3);
211 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
213 (1 << 28) | /* surface id */
214 (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */
216 surface_format << 28 |
218 ADVANCE_BCS_BATCH(batch);
220 BEGIN_BCS_BATCH(batch, 3);
221 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
223 (0 << 28) | /* surface id */
224 (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */
226 surface_format << 28 |
228 ADVANCE_BCS_BATCH(batch);
232 gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx, struct encode_state *encode_state,
233 struct intel_encoder_context *encoder_context)
235 struct i965_driver_data *i965 = i965_driver_data(ctx);
236 struct intel_batchbuffer *batch = encoder_context->base.batch;
237 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
238 struct object_surface *obj_surface;
239 GenHevcSurface *hcpe_hevc_surface;
243 if (IS_KBL(i965->intel.device_info) ||
244 IS_GLK(i965->intel.device_info)) {
245 BEGIN_BCS_BATCH(batch, 104);
247 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
249 BEGIN_BCS_BATCH(batch, 95);
251 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
254 obj_surface = encode_state->reconstructed_object;
255 assert(obj_surface && obj_surface->bo);
256 hcpe_hevc_surface = obj_surface->private_data;
257 assert(hcpe_hevc_surface && hcpe_hevc_surface->motion_vector_temporal_bo);
259 OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
260 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
261 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
262 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
263 OUT_BUFFER_MA_TARGET(mfc_context->metadata_line_buffer.bo); /* DW 13..15 */
264 OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_line_buffer.bo); /* DW 16..18 */
265 OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_column_buffer.bo); /* DW 19..21 */
266 OUT_BUFFER_MA_TARGET(mfc_context->sao_line_buffer.bo); /* DW 22..24 */
267 OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_line_buffer.bo); /* DW 25..27 */
268 OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_column_buffer.bo); /* DW 28..30 */
269 OUT_BUFFER_MA_TARGET(hcpe_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
270 OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
272 /* here only max 8 reference allowed */
273 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
274 bo = mfc_context->reference_surfaces[i].bo;
277 OUT_BUFFER_NMA_REFERENCE(bo);
279 OUT_BUFFER_NMA_REFERENCE(NULL);
281 OUT_BCS_BATCH(batch, 0); /* DW 53, memory address attributes */
283 OUT_BUFFER_MA_TARGET(mfc_context->uncompressed_picture_source.bo); /* DW 54..56, uncompressed picture source */
284 OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, ignore */
285 OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, ignore */
286 OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, ignore */
288 for (i = 0; i < ARRAY_ELEMS(mfc_context->current_collocated_mv_temporal_buffer) - 1; i++) {
289 bo = mfc_context->current_collocated_mv_temporal_buffer[i].bo;
292 OUT_BUFFER_NMA_REFERENCE(bo);
294 OUT_BUFFER_NMA_REFERENCE(NULL);
296 OUT_BCS_BATCH(batch, 0); /* DW 82, memory address attributes */
298 OUT_BUFFER_MA_TARGET(NULL); /* DW 83..85, ignore for HEVC */
299 OUT_BUFFER_MA_TARGET(NULL); /* DW 86..88, ignore for HEVC */
300 OUT_BUFFER_MA_TARGET(NULL); /* DW 89..91, ignore for HEVC */
301 OUT_BUFFER_MA_TARGET(NULL); /* DW 92..94, ignore for HEVC */
303 if (IS_KBL(i965->intel.device_info) ||
304 IS_GLK(i965->intel.device_info)) {
305 for (i = 0; i < 9; i++)
306 OUT_BCS_BATCH(batch, 0);
309 ADVANCE_BCS_BATCH(batch);
313 gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,
314 struct intel_encoder_context *encoder_context)
316 struct i965_driver_data *i965 = i965_driver_data(ctx);
317 struct intel_batchbuffer *batch = encoder_context->base.batch;
318 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
321 BEGIN_BCS_BATCH(batch, 14);
323 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
324 OUT_BUFFER_MA_REFERENCE(NULL); /* DW 1..3 igonre for encoder*/
325 OUT_BUFFER_NMA_REFERENCE(NULL); /* DW 4..5, Upper Bound */
326 OUT_BUFFER_MA_TARGET(mfc_context->hcp_indirect_cu_object.bo); /* DW 6..8, CU */
327 /* DW 9..11, PAK-BSE */
328 OUT_BCS_RELOC64(batch,
329 mfc_context->hcp_indirect_pak_bse_object.bo,
330 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
331 mfc_context->hcp_indirect_pak_bse_object.offset);
332 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
333 OUT_BCS_RELOC64(batch,
334 mfc_context->hcp_indirect_pak_bse_object.bo,
335 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
336 mfc_context->hcp_indirect_pak_bse_object.end_offset);
338 ADVANCE_BCS_BATCH(batch);
342 gen9_hcpe_fqm_state(VADriverContextP ctx,
349 struct intel_encoder_context *encoder_context)
351 struct intel_batchbuffer *batch = encoder_context->base.batch;
352 unsigned int fqm_buffer[32];
354 assert(fqm_length <= 32);
355 assert(sizeof(*fqm) == 4);
356 memset(fqm_buffer, 0, sizeof(fqm_buffer));
357 memcpy(fqm_buffer, fqm, fqm_length * 4);
359 BEGIN_BCS_BATCH(batch, 34);
361 OUT_BCS_BATCH(batch, HCP_FQM_STATE | (34 - 2));
364 color_component << 3 |
367 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
369 ADVANCE_BCS_BATCH(batch);
374 gen9_hcpe_hevc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 unsigned int qm[32] = {
377 0x10001000, 0x10001000, 0x10001000, 0x10001000,
378 0x10001000, 0x10001000, 0x10001000, 0x10001000,
379 0x10001000, 0x10001000, 0x10001000, 0x10001000,
380 0x10001000, 0x10001000, 0x10001000, 0x10001000,
381 0x10001000, 0x10001000, 0x10001000, 0x10001000,
382 0x10001000, 0x10001000, 0x10001000, 0x10001000,
383 0x10001000, 0x10001000, 0x10001000, 0x10001000,
384 0x10001000, 0x10001000, 0x10001000, 0x10001000
387 gen9_hcpe_fqm_state(ctx,
391 gen9_hcpe_fqm_state(ctx,
395 gen9_hcpe_fqm_state(ctx,
399 gen9_hcpe_fqm_state(ctx,
403 gen9_hcpe_fqm_state(ctx,
407 gen9_hcpe_fqm_state(ctx,
411 gen9_hcpe_fqm_state(ctx,
415 gen9_hcpe_fqm_state(ctx,
422 gen9_hcpe_qm_state(VADriverContextP ctx,
429 struct intel_encoder_context *encoder_context)
431 struct intel_batchbuffer *batch = encoder_context->base.batch;
432 unsigned int qm_buffer[16];
434 assert(qm_length <= 16);
435 assert(sizeof(*qm) == 4);
436 memset(qm_buffer, 0, sizeof(qm_buffer));
437 memcpy(qm_buffer, qm, qm_length * 4);
439 BEGIN_BCS_BATCH(batch, 18);
441 OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
444 color_component << 3 |
447 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
449 ADVANCE_BCS_BATCH(batch);
453 gen9_hcpe_hevc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
458 unsigned int qm[16] = {
459 0x10101010, 0x10101010, 0x10101010, 0x10101010,
460 0x10101010, 0x10101010, 0x10101010, 0x10101010,
461 0x10101010, 0x10101010, 0x10101010, 0x10101010,
462 0x10101010, 0x10101010, 0x10101010, 0x10101010
465 for (i = 0; i < 6; i++) {
466 gen9_hcpe_qm_state(ctx,
472 for (i = 0; i < 6; i++) {
473 gen9_hcpe_qm_state(ctx,
479 for (i = 0; i < 6; i++) {
480 gen9_hcpe_qm_state(ctx,
486 for (i = 0; i < 2; i++) {
487 gen9_hcpe_qm_state(ctx,
495 gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state,
496 struct intel_encoder_context *encoder_context)
498 struct i965_driver_data *i965 = i965_driver_data(ctx);
499 struct intel_batchbuffer *batch = encoder_context->base.batch;
500 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
501 VAEncPictureParameterBufferHEVC *pic_param ;
502 VAEncSequenceParameterBufferHEVC *seq_param ;
504 int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
505 int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
509 * When not present, the value of loop_filter_across_tiles_enabled_flag
510 * is inferred to be equal to 1.
512 int loop_filter_across_tiles_enabled_flag = 0;
513 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
514 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
516 int log2_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
517 int log2_ctb_size = seq_param->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
518 int ctb_size = 1 << log2_ctb_size;
519 double rawctubits = 8 * 3 * ctb_size * ctb_size / 2.0;
520 int maxctubits = (int)(5 * rawctubits / 3) ;
521 double bitrate = (double)encoder_context->brc.bits_per_second[0];
522 double framebitrate = bitrate / 32 / 8; //32 byte unit
523 int minframebitrate = 0;//(int) (framebitrate * 3 / 10);
524 int maxframebitrate = (int)(framebitrate * 10 / 10);
525 int maxdeltaframebitrate = 0x1c5c; //(int) (framebitrate * 1/ 10);
526 int mindeltaframebitrate = 0; //(int) (framebitrate * 1/ 10);
527 int minframesize = 0;//(int)(rawframebits * 1/50);
529 if (seq_param->seq_fields.bits.pcm_enabled_flag) {
530 max_pcm_size_minus3 = seq_param->log2_max_pcm_luma_coding_block_size_minus3;
531 min_pcm_size_minus3 = seq_param->log2_min_pcm_luma_coding_block_size_minus3;
532 pcm_sample_bit_depth_luma_minus1 = (seq_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
533 pcm_sample_bit_depth_chroma_minus1 = (seq_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
535 max_pcm_size_minus3 = MIN(seq_param->log2_min_luma_coding_block_size_minus3 + seq_param->log2_diff_max_min_luma_coding_block_size, 2);
538 if (pic_param->pic_fields.bits.tiles_enabled_flag)
539 loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
541 /* set zero for encoder */
542 loop_filter_across_tiles_enabled_flag = 0;
544 if (IS_KBL(i965->intel.device_info) ||
545 IS_GLK(i965->intel.device_info)) {
546 BEGIN_BCS_BATCH(batch, 31);
548 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (31 - 2));
550 BEGIN_BCS_BATCH(batch, 19);
552 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
556 mfc_context->pic_size.picture_height_in_min_cb_minus1 << 16 |
558 mfc_context->pic_size.picture_width_in_min_cb_minus1);
560 max_pcm_size_minus3 << 10 |
561 min_pcm_size_minus3 << 8 |
562 (seq_param->log2_min_transform_block_size_minus2 +
563 seq_param->log2_diff_max_min_transform_block_size) << 6 |
564 seq_param->log2_min_transform_block_size_minus2 << 4 |
565 (seq_param->log2_min_luma_coding_block_size_minus3 +
566 seq_param->log2_diff_max_min_luma_coding_block_size) << 2 |
567 seq_param->log2_min_luma_coding_block_size_minus3);
568 OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
570 ((IS_KBL(i965->intel.device_info) || IS_GLK(i965->intel.device_info)) ?
571 1 : 0) << 27 | /* CU packet structure is 0 for SKL */
572 seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
573 pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
574 seq_param->seq_fields.bits.amp_enabled_flag << 23 |
575 pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
576 0 << 21 | /* 0 for encoder !(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)*/
577 0 << 20 | /* 0 for encoder !!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_FIELD_PIC)*/
578 pic_param->pic_fields.bits.weighted_pred_flag << 19 |
579 pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
580 pic_param->pic_fields.bits.tiles_enabled_flag << 17 | /* 0 for encoder */
581 pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
582 loop_filter_across_tiles_enabled_flag << 15 |
583 pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 | /* 0 for encoder */
584 pic_param->log2_parallel_merge_level_minus2 << 10 | /* 0 for encoder */
585 pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 | /* 0 for encoder */
586 seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8 |
587 (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 | /* 0 for encoder */
588 pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 | /* 0 for encoder */
589 seq_param->seq_fields.bits.pcm_enabled_flag << 4 |
590 seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */
593 seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27 | /* 10 bit for KBL+*/
594 seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24 | /* 10 bit for KBL+ */
595 pcm_sample_bit_depth_luma_minus1 << 20 |
596 pcm_sample_bit_depth_chroma_minus1 << 16 |
597 seq_param->max_transform_hierarchy_depth_inter << 13 | /* for encoder */
598 seq_param->max_transform_hierarchy_depth_intra << 10 | /* for encoder */
599 (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
600 (pic_param->pps_cb_qp_offset & 0x1f));
602 0 << 29 | /* must be 0 for encoder */
603 maxctubits); /* DW 6, max LCU bit size allowed for encoder */
605 0 << 31 | /* frame bitrate max unit */
606 maxframebitrate); /* DW 7, frame bitrate max 0:13 */
608 0 << 31 | /* frame bitrate min unit */
609 minframebitrate); /* DW 8, frame bitrate min 0:13 */
611 maxdeltaframebitrate << 16 | /* frame bitrate max delta ,help to select deltaQP of slice*/
612 mindeltaframebitrate); /* DW 9,(0,14) frame bitrate min delta ,help to select deltaQP of slice*/
613 OUT_BCS_BATCH(batch, 0x07050402); /* DW 10, frame delta qp max */
614 OUT_BCS_BATCH(batch, 0x0d0b0908);
615 OUT_BCS_BATCH(batch, 0); /* DW 12, frame delta qp min */
616 OUT_BCS_BATCH(batch, 0);
617 OUT_BCS_BATCH(batch, 0x04030200); /* DW 14, frame delta qp max range */
618 OUT_BCS_BATCH(batch, 0x100c0806); /* DW 15 */
619 OUT_BCS_BATCH(batch, 0x04030200); /* DW 16, frame delta qp min range */
620 OUT_BCS_BATCH(batch, 0x100c0806);
623 minframesize); /* DW 18, min frame size units */
625 if (IS_KBL(i965->intel.device_info) ||
626 IS_GLK(i965->intel.device_info)) {
629 for (i = 0; i < 12; i++)
630 OUT_BCS_BATCH(batch, 0);
633 ADVANCE_BCS_BATCH(batch);
638 gen9_hcpe_hevc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
639 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
640 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
641 struct intel_batchbuffer *batch)
644 batch = encoder_context->base.batch;
646 if (data_bits_in_last_dw == 0)
647 data_bits_in_last_dw = 32;
649 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
651 OUT_BCS_BATCH(batch, HCP_INSERT_PAK_OBJECT | (lenght_in_dws + 2 - 2));
653 (0 << 31) | /* inline payload */
654 (0 << 16) | /* always start at offset 0 */
655 (0 << 15) | /* HeaderLengthExcludeFrmSize */
656 (data_bits_in_last_dw << 8) |
657 (skip_emul_byte_count << 4) |
658 (!!emulation_flag << 3) |
659 ((!!is_last_header) << 2) |
660 ((!!is_end_of_slice) << 1) |
661 (0 << 0)); /* Reserved */
662 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
664 ADVANCE_BCS_BATCH(batch);
669 intel_get_ref_idx_state_1(VAPictureHEVC *va_pic, unsigned int frame_store_id)
671 unsigned int is_long_term =
672 !!(va_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
673 unsigned int is_top_field =
674 !!!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
675 unsigned int is_bottom_field =
676 !!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
678 return ((is_long_term << 6) |
679 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
680 (frame_store_id << 1) |
681 ((is_top_field ^ 1) & is_bottom_field));
685 gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch,
687 struct intel_encoder_context *encoder_context,
688 struct encode_state *encode_state)
691 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
692 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
693 uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
694 VAPictureHEVC *ref_list = (list ? slice_param->ref_pic_list1 : slice_param->ref_pic_list0);
695 struct gen6_vme_context *vme_context = encoder_context->vme_context;
696 struct object_surface *obj_surface;
699 int ref_idx_l0 = (vme_context->ref_index_in_mb[list] & 0xff);
701 if (ref_idx_l0 > 3) {
702 WARN_ONCE("ref_idx_l0 is out of range\n");
706 obj_surface = vme_context->used_reference_objects[list];
708 for (i = 0; i < 16; i++) {
710 obj_surface == encode_state->reference_objects[i]) {
715 if (frame_index == -1) {
716 WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n");
719 BEGIN_BCS_BATCH(batch, 18);
721 OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
723 num_ref_minus1 << 1 |
726 for (i = 0; i < 16; i++) {
727 if (i < MIN((num_ref_minus1 + 1), 15)) {
728 VAPictureHEVC *ref_pic = &ref_list[i];
729 VAPictureHEVC *curr_pic = &pic_param->decoded_curr_pic;
732 1 << 15 | /* bottom_field_flag 0 */
733 0 << 14 | /* field_pic_flag 0 */
734 !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 | /* short term is 1 */
735 0 << 12 | /* disable WP */
736 0 << 11 | /* disable WP */
738 (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
740 OUT_BCS_BATCH(batch, 0);
744 ADVANCE_BCS_BATCH(batch);
748 intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,
749 struct encode_state *encode_state,
750 struct intel_encoder_context *encoder_context
753 struct intel_batchbuffer *batch = encoder_context->base.batch;
754 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
756 if (slice_param->slice_type == HEVC_SLICE_I)
759 gen9_hcpe_ref_idx_state_1(batch, 0, encoder_context, encode_state);
761 if (slice_param->slice_type == HEVC_SLICE_P)
764 gen9_hcpe_ref_idx_state_1(batch, 1, encoder_context, encode_state);
768 gen9_hcpe_hevc_slice_state(VADriverContextP ctx,
769 VAEncPictureParameterBufferHEVC *pic_param,
770 VAEncSliceParameterBufferHEVC *slice_param,
771 struct encode_state *encode_state,
772 struct intel_encoder_context *encoder_context,
773 struct intel_batchbuffer *batch)
775 struct i965_driver_data *i965 = i965_driver_data(ctx);
776 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
777 int slice_type = slice_param->slice_type;
779 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
780 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
781 int ctb_size = 1 << log2_ctb_size;
782 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
783 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
784 int last_slice = (((slice_param->slice_segment_address + slice_param->num_ctu_in_slice) == (width_in_ctb * height_in_ctb)) ? 1 : 0);
786 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
788 slice_hor_pos = slice_param->slice_segment_address % width_in_ctb;
789 slice_ver_pos = slice_param->slice_segment_address / width_in_ctb;
791 next_slice_hor_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) % width_in_ctb;
792 next_slice_ver_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) / width_in_ctb;
794 /* only support multi slice begin from row start address */
795 assert((slice_param->slice_segment_address % width_in_ctb) == 0);
797 if (last_slice == 1) {
798 if (slice_param->slice_segment_address == 0) {
799 next_slice_hor_pos = 0;
800 next_slice_ver_pos = height_in_ctb;
802 next_slice_hor_pos = 0;
803 next_slice_ver_pos = 0;
807 if (IS_KBL(i965->intel.device_info) ||
808 IS_GLK(i965->intel.device_info)) {
809 BEGIN_BCS_BATCH(batch, 11);
811 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (11 - 2));
813 BEGIN_BCS_BATCH(batch, 9);
815 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
819 slice_ver_pos << 16 |
822 next_slice_ver_pos << 16 |
825 (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
826 (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
827 (pic_param->pic_init_qp + slice_param->slice_qp_delta) << 6 |
828 slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag << 5 |
829 slice_param->slice_fields.bits.dependent_slice_segment_flag << 4 |
834 (slice_param->max_num_merge_cand - 1) << 23 |
835 slice_param->slice_fields.bits.cabac_init_flag << 22 |
836 slice_param->luma_log2_weight_denom << 19 |
837 (slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) << 16 |
838 slice_param->slice_fields.bits.collocated_from_l0_flag << 15 |
839 (slice_type != HEVC_SLICE_B) << 14 |
840 slice_param->slice_fields.bits.mvd_l1_zero_flag << 13 |
841 slice_param->slice_fields.bits.slice_sao_luma_flag << 12 |
842 slice_param->slice_fields.bits.slice_sao_chroma_flag << 11 |
843 slice_param->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag << 10 |
844 (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
845 (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
846 slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag);
847 OUT_BCS_BATCH(batch, 0); /* DW 5 ,ignore for encoder.*/
853 1 << 10 | /* header insertion enable */
854 1 << 9 | /* slice data enable */
855 1 << 8 | /* tail insertion enable, must at end of frame, not slice */
856 1 << 2 | /* RBSP or EBSP, EmulationByteSliceInsertEnable */
857 1 << 1 | /* cabacZeroWordInsertionEnable */
858 0); /* Ignored for decoding */
859 OUT_BCS_BATCH(batch, 0); /* PAK-BSE data start offset */
861 if (IS_KBL(i965->intel.device_info) ||
862 IS_GLK(i965->intel.device_info)) {
863 OUT_BCS_BATCH(batch, 0);
864 OUT_BCS_BATCH(batch, 0);
867 ADVANCE_BCS_BATCH(batch);
870 /* HEVC pipe line related */
871 static void gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,
872 struct encode_state *encode_state,
873 struct intel_encoder_context *encoder_context)
875 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
877 mfc_context->pipe_mode_select(ctx, HCP_CODEC_HEVC, encoder_context);
878 mfc_context->set_surface_state(ctx, encode_state, encoder_context);
879 gen9_hcpe_pipe_buf_addr_state(ctx, encode_state, encoder_context);
880 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
882 mfc_context->qm_state(ctx, encoder_context);
883 mfc_context->fqm_state(ctx, encoder_context);
884 mfc_context->pic_state(ctx, encode_state, encoder_context);
885 intel_hcpe_hevc_ref_idx_state(ctx, encode_state, encoder_context);
888 static void gen9_hcpe_init(VADriverContextP ctx,
889 struct encode_state *encode_state,
890 struct intel_encoder_context *encoder_context)
893 struct i965_driver_data *i965 = i965_driver_data(ctx);
894 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
895 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
896 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
899 int slice_batchbuffer_size;
900 int slice_type = slice_param->slice_type;
901 int is_inter = (slice_type != HEVC_SLICE_I);
903 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
904 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
905 int ctb_size = 1 << log2_ctb_size;
906 int cu_size = 1 << log2_cu_size;
908 int width_in_ctb = ALIGN(pSequenceParameter->pic_width_in_luma_samples , ctb_size) / ctb_size;
909 int height_in_ctb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, ctb_size) / ctb_size;
910 int width_in_cu = ALIGN(pSequenceParameter->pic_width_in_luma_samples , cu_size) / cu_size;
911 int height_in_cu = ALIGN(pSequenceParameter->pic_height_in_luma_samples, cu_size) / cu_size;
912 int width_in_mb = ALIGN(pSequenceParameter->pic_width_in_luma_samples , 16) / 16;
913 int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16;
915 int num_cu_record = 64;
918 if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
919 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
922 if (log2_ctb_size == 5) num_cu_record = 16;
923 else if (log2_ctb_size == 4) num_cu_record = 4;
924 else if (log2_ctb_size == 6) num_cu_record = 64;
926 /* frame size in samples, cu,ctu, mb */
927 mfc_context->pic_size.picture_width_in_samples = pSequenceParameter->pic_width_in_luma_samples;
928 mfc_context->pic_size.picture_height_in_samples = pSequenceParameter->pic_height_in_luma_samples;
929 mfc_context->pic_size.ctb_size = ctb_size;
930 mfc_context->pic_size.picture_width_in_ctbs = width_in_ctb;
931 mfc_context->pic_size.picture_height_in_ctbs = height_in_ctb;
932 mfc_context->pic_size.min_cb_size = cu_size;
933 mfc_context->pic_size.picture_width_in_min_cb_minus1 = width_in_cu - 1;
934 mfc_context->pic_size.picture_height_in_min_cb_minus1 = height_in_cu - 1;
935 mfc_context->pic_size.picture_width_in_mbs = width_in_mb;
936 mfc_context->pic_size.picture_height_in_mbs = height_in_mb;
938 slice_batchbuffer_size = 64 * width_in_ctb * width_in_ctb + 4096 +
939 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
941 /*Encode common setup for HCP*/
943 dri_bo_unreference(mfc_context->deblocking_filter_line_buffer.bo);
944 mfc_context->deblocking_filter_line_buffer.bo = NULL;
946 dri_bo_unreference(mfc_context->deblocking_filter_tile_line_buffer.bo);
947 mfc_context->deblocking_filter_tile_line_buffer.bo = NULL;
949 dri_bo_unreference(mfc_context->deblocking_filter_tile_column_buffer.bo);
950 mfc_context->deblocking_filter_tile_column_buffer.bo = NULL;
953 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
954 mfc_context->uncompressed_picture_source.bo = NULL;
957 dri_bo_unreference(mfc_context->metadata_line_buffer.bo);
958 mfc_context->metadata_line_buffer.bo = NULL;
960 dri_bo_unreference(mfc_context->metadata_tile_line_buffer.bo);
961 mfc_context->metadata_tile_line_buffer.bo = NULL;
963 dri_bo_unreference(mfc_context->metadata_tile_column_buffer.bo);
964 mfc_context->metadata_tile_column_buffer.bo = NULL;
967 dri_bo_unreference(mfc_context->sao_line_buffer.bo);
968 mfc_context->sao_line_buffer.bo = NULL;
970 dri_bo_unreference(mfc_context->sao_tile_line_buffer.bo);
971 mfc_context->sao_tile_line_buffer.bo = NULL;
973 dri_bo_unreference(mfc_context->sao_tile_column_buffer.bo);
974 mfc_context->sao_tile_column_buffer.bo = NULL;
976 /* mv temporal buffer */
977 for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
978 if (mfc_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
979 dri_bo_unreference(mfc_context->current_collocated_mv_temporal_buffer[i].bo);
980 mfc_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
984 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
985 if (mfc_context->reference_surfaces[i].bo != NULL)
986 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
987 mfc_context->reference_surfaces[i].bo = NULL;
990 /* indirect data CU recording */
991 dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
992 mfc_context->hcp_indirect_cu_object.bo = NULL;
994 dri_bo_unreference(mfc_context->hcp_indirect_pak_bse_object.bo);
995 mfc_context->hcp_indirect_pak_bse_object.bo = NULL;
997 /* Current internal buffer for HCP */
999 size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> size_shift;
1001 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size);
1002 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1004 size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> size_shift;
1006 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1009 size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 188 + 9 * width_in_ctb + 1023) >> 9;
1011 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1013 size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 172 + 9 * width_in_ctb + 1023) >> 9;
1015 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1017 size = (((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4) * 176 + 89 * width_in_ctb + 1023) >> 9;
1019 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1021 size = (pSequenceParameter->pic_width_in_luma_samples + 8 * width_in_ctb + 1023) >> 9;
1023 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1025 size = (pSequenceParameter->pic_width_in_luma_samples + 16 * width_in_ctb + 1023) >> 9;
1027 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1029 size = (pSequenceParameter->pic_height_in_luma_samples + 8 * height_in_ctb + 1023) >> 9;
1031 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1034 size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> size_shift;
1036 ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size);
1038 size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> size_shift;
1040 ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size);
1042 size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> size_shift;
1044 ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size);
1046 /////////////////////
1047 dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1048 bo = dri_bo_alloc(i965->intel.bufmgr,
1049 "Indirect data CU Buffer",
1050 width_in_ctb * height_in_ctb * num_cu_record * 16 * 4,
1053 mfc_context->hcp_indirect_cu_object.bo = bo;
1055 /* to do pak bse object buffer */
1056 /* to do current collocated mv temporal buffer */
1058 dri_bo_unreference(mfc_context->hcp_batchbuffer_surface.bo);
1059 mfc_context->hcp_batchbuffer_surface.bo = NULL;
1061 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1062 mfc_context->aux_batchbuffer_surface.bo = NULL;
1064 if (mfc_context->aux_batchbuffer)
1065 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1067 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
1068 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
1069 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1070 mfc_context->aux_batchbuffer_surface.pitch = 16;
1071 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
1072 mfc_context->aux_batchbuffer_surface.size_block = 16;
1075 static VAStatus gen9_hcpe_run(VADriverContextP ctx,
1076 struct encode_state *encode_state,
1077 struct intel_encoder_context *encoder_context)
1079 struct intel_batchbuffer *batch = encoder_context->base.batch;
1081 intel_batchbuffer_flush(batch); //run the pipeline
1083 return VA_STATUS_SUCCESS;
1088 gen9_hcpe_stop(VADriverContextP ctx,
1089 struct encode_state *encode_state,
1090 struct intel_encoder_context *encoder_context,
1091 int *encoded_bits_size)
1093 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1094 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1095 VACodedBufferSegment *coded_buffer_segment;
1097 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1098 assert(vaStatus == VA_STATUS_SUCCESS);
1099 *encoded_bits_size = coded_buffer_segment->size * 8;
1100 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1102 return VA_STATUS_SUCCESS;
1106 int intel_hevc_find_skipemulcnt(unsigned char *buf, int bits_length)
1110 int leading_zero_cnt, byte_length, zero_byte;
1114 #define NAL_UNIT_TYPE_MASK 0x7e
1115 #define HW_MAX_SKIP_LENGTH 15
1117 byte_length = ALIGN(bits_length, 32) >> 3;
1120 leading_zero_cnt = 0;
1122 for (i = 0; i < byte_length - 4; i++) {
1123 if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
1124 ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
1131 /* warning message is complained. But anyway it will be inserted. */
1132 WARN_ONCE("Invalid packed header data. "
1133 "Can't find the 000001 start_prefix code\n");
1136 i = leading_zero_cnt;
1139 if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
1142 skip_cnt = leading_zero_cnt + zero_byte + 3;
1144 /* the unit header byte is accounted */
1145 nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
1147 skip_cnt += 1; /* two bytes length of nal headers in hevc */
1149 if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
1150 /* more unit header bytes are accounted for MVC/SVC */
1153 if (skip_cnt > HW_MAX_SKIP_LENGTH) {
1154 WARN_ONCE("Too many leading zeros are padded for packed data. "
1155 "It is beyond the HW range.!!!\n");
1160 #ifdef HCP_SOFTWARE_SKYLAKE
1163 gen9_hcpe_hevc_pak_object(VADriverContextP ctx, int lcu_x, int lcu_y, int isLast_ctb,
1164 struct intel_encoder_context *encoder_context,
1165 int cu_count_in_lcu, unsigned int split_coding_unit_flag,
1166 struct intel_batchbuffer *batch)
1168 struct i965_driver_data *i965 = i965_driver_data(ctx);
1169 int len_in_dwords = 3;
1171 if (IS_KBL(i965->intel.device_info) ||
1172 IS_GLK(i965->intel.device_info))
1176 batch = encoder_context->base.batch;
1178 BEGIN_BCS_BATCH(batch, len_in_dwords);
1180 OUT_BCS_BATCH(batch, HCP_PAK_OBJECT | (len_in_dwords - 2));
1181 OUT_BCS_BATCH(batch,
1182 (((isLast_ctb > 0) ? 1 : 0) << 31) | /* last ctb?*/
1183 ((cu_count_in_lcu - 1) << 24) | /* No motion vector */
1184 split_coding_unit_flag);
1186 OUT_BCS_BATCH(batch, (lcu_y << 16) | lcu_x); /* LCU for Y*/
1188 if (IS_KBL(i965->intel.device_info) ||
1189 IS_GLK(i965->intel.device_info)) {
1190 OUT_BCS_BATCH(batch, 0);
1191 OUT_BCS_BATCH(batch, 0);
1194 ADVANCE_BCS_BATCH(batch);
1196 return len_in_dwords;
1199 #define AVC_INTRA_RDO_OFFSET 4
1200 #define AVC_INTER_RDO_OFFSET 10
1201 #define AVC_INTER_MSG_OFFSET 8
1202 #define AVC_INTER_MV_OFFSET 48
1203 #define AVC_RDO_MASK 0xFFFF
1205 #define AVC_INTRA_MODE_MASK 0x30
1206 #define AVC_INTRA_16X16 0x00
1207 #define AVC_INTRA_8X8 0x01
1208 #define AVC_INTRA_4X4 0x02
1210 #define AVC_INTER_MODE_MASK 0x03
1211 #define AVC_INTER_8X8 0x03
1212 #define AVC_INTER_8X16 0x02
1213 #define AVC_INTER_16X8 0x01
1214 #define AVC_INTER_16X16 0x00
1215 #define AVC_SUBMB_SHAPE_MASK 0x00FF00
1217 /* VME output message, write back message */
1218 #define AVC_INTER_SUBMB_PRE_MODE_MASK 0x00ff0000
1219 #define AVC_SUBMB_SHAPE_MASK 0x00FF00
1221 /* here 1 MB = 1CU = 16x16 */
1223 gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
1224 struct encode_state *encode_state,
1225 struct intel_encoder_context *encoder_context,
1226 int qp, unsigned int *msg,
1227 int ctb_x, int ctb_y,
1229 int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index, int index)
1231 /* here cu == mb, so we use mb address as the cu address */
1232 /* to fill the indirect cu by the vme out */
1233 static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
1234 static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
1235 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1236 unsigned char * cu_record_ptr = NULL;
1237 unsigned int * cu_msg = NULL;
1238 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1239 int mb_address_in_ctb = 0;
1240 int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1243 int intraMbMode = 0;
1244 int cu_part_mode = 0;
1246 int inerpred_idc = 0;
1247 int intra_chroma_mode = 5;
1251 int chroma_mode_remap[4] = {5, 4, 3, 2};
1253 if (!is_inter) inerpred_idc = 0xff;
1255 intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1257 intra_chroma_mode = (msg[3] & 0x3);
1258 intra_chroma_mode = chroma_mode_remap[intra_chroma_mode];
1259 if (intraMbMode == AVC_INTRA_16X16) {
1260 cu_part_mode = 0; //2Nx2N
1264 intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1265 intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1266 intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1267 intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1268 } else if (intraMbMode == AVC_INTRA_8X8) {
1269 cu_part_mode = 0; //2Nx2N
1273 intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1274 intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1275 intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1276 intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1278 } else { // for 4x4 to use 8x8 replace
1279 cu_part_mode = 3; //NxN
1283 intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
1284 intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
1285 intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
1286 intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
1290 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1291 /* get the mb info from the vme out */
1292 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1294 cu_msg[0] = (inerpred_idc << 24 | /* interpred_idc[3:0][1:0] */
1295 zero << 23 | /* reserved */
1296 qp << 16 | /* CU_qp */
1297 zero << 11 | /* reserved */
1298 intra_chroma_mode << 8 | /* intra_chroma_mode */
1299 zero << 7 | /* IPCM_enable , reserved for SKL*/
1300 cu_part_mode << 4 | /* cu_part_mode */
1301 zero << 3 | /* cu_transquant_bypass_flag */
1302 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1303 cu_size /* cu_size */
1305 cu_msg[1] = (zero << 30 | /* reserved */
1306 intraMode[3] << 24 | /* intra_mode */
1307 zero << 22 | /* reserved */
1308 intraMode[2] << 16 | /* intra_mode */
1309 zero << 14 | /* reserved */
1310 intraMode[1] << 8 | /* intra_mode */
1311 zero << 6 | /* reserved */
1312 intraMode[0] /* intra_mode */
1314 /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1315 cu_msg[2] = (zero << 16 | /* mvx_l0[1] */
1316 zero /* mvx_l0[0] */
1318 cu_msg[3] = (zero << 16 | /* mvx_l0[3] */
1319 zero /* mvx_l0[2] */
1321 cu_msg[4] = (zero << 16 | /* mvy_l0[1] */
1322 zero /* mvy_l0[0] */
1324 cu_msg[5] = (zero << 16 | /* mvy_l0[3] */
1325 zero /* mvy_l0[2] */
1328 cu_msg[6] = (zero << 16 | /* mvx_l1[1] */
1329 zero /* mvx_l1[0] */
1331 cu_msg[7] = (zero << 16 | /* mvx_l1[3] */
1332 zero /* mvx_l1[2] */
1334 cu_msg[8] = (zero << 16 | /* mvy_l1[1] */
1335 zero /* mvy_l1[0] */
1337 cu_msg[9] = (zero << 16 | /* mvy_l1[3] */
1338 zero /* mvy_l1[2] */
1341 cu_msg[10] = (zero << 28 | /* ref_idx_l1[3] */
1342 zero << 24 | /* ref_idx_l1[2] */
1343 zero << 20 | /* ref_idx_l1[1] */
1344 zero << 16 | /* ref_idx_l1[0] */
1345 zero << 12 | /* ref_idx_l0[3] */
1346 zero << 8 | /* ref_idx_l0[2] */
1347 zero << 4 | /* ref_idx_l0[1] */
1348 zero /* ref_idx_l0[0] */
1351 cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1352 cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1353 zero << 16 | /* reserved */
1354 zero /* tu_xform_Yskip[15:0] */
1356 cu_msg[13] = (zero << 16 | /* tu_xform_Vskip[15:0] */
1357 zero /* tu_xform_Uskip[15:0] */
1363 /* here 1 MB = 1CU = 16x16 */
1365 gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
1366 struct encode_state *encode_state,
1367 struct intel_encoder_context *encoder_context,
1368 int qp, unsigned int *msg,
1369 int ctb_x, int ctb_y,
1371 int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index, int index)
1373 /* here cu == mb, so we use mb address as the cu address */
1374 /* to fill the indirect cu by the vme out */
1375 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1376 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1377 unsigned char * cu_record_ptr = NULL;
1378 unsigned int * cu_msg = NULL;
1379 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1380 int mb_address_in_ctb = 0;
1381 int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1383 int cu_part_mode = 0;
1384 int submb_pre_mode = 0;
1391 unsigned int *mv_ptr;
1393 inter_mode = (msg[0] & AVC_INTER_MODE_MASK);
1394 submb_pre_mode = (msg[1] & AVC_INTER_SUBMB_PRE_MODE_MASK) >> 16;
1395 #define MSG_MV_OFFSET 4
1396 mv_ptr = msg + MSG_MV_OFFSET;
1397 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1398 * to convert them to be compatible with the format of AVC_PAK
1401 /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
1403 if (inter_mode == AVC_INTER_16X16) {
1404 mv_ptr[4] = mv_ptr[0];
1405 mv_ptr[5] = mv_ptr[1];
1406 mv_ptr[2] = mv_ptr[0];
1407 mv_ptr[3] = mv_ptr[1];
1408 mv_ptr[6] = mv_ptr[0];
1409 mv_ptr[7] = mv_ptr[1];
1414 } else if (inter_mode == AVC_INTER_8X16) {
1415 mv_ptr[4] = mv_ptr[0];
1416 mv_ptr[5] = mv_ptr[1];
1417 mv_ptr[2] = mv_ptr[8];
1418 mv_ptr[3] = mv_ptr[9];
1419 mv_ptr[6] = mv_ptr[8];
1420 mv_ptr[7] = mv_ptr[9];
1425 } else if (inter_mode == AVC_INTER_16X8) {
1426 mv_ptr[2] = mv_ptr[0];
1427 mv_ptr[3] = mv_ptr[1];
1428 mv_ptr[4] = mv_ptr[16];
1429 mv_ptr[5] = mv_ptr[17];
1430 mv_ptr[6] = mv_ptr[24];
1431 mv_ptr[7] = mv_ptr[25];
1436 } else if (inter_mode == AVC_INTER_8X8) {
1437 mv_ptr[0] = mv_ptr[index * 8 + 0 ];
1438 mv_ptr[1] = mv_ptr[index * 8 + 1 ];
1439 mv_ptr[2] = mv_ptr[index * 8 + 0 ];
1440 mv_ptr[3] = mv_ptr[index * 8 + 1 ];
1441 mv_ptr[4] = mv_ptr[index * 8 + 0 ];
1442 mv_ptr[5] = mv_ptr[index * 8 + 1 ];
1443 mv_ptr[6] = mv_ptr[index * 8 + 0 ];
1444 mv_ptr[7] = mv_ptr[index * 8 + 1 ];
1451 mv_ptr[4] = mv_ptr[0];
1452 mv_ptr[5] = mv_ptr[1];
1453 mv_ptr[2] = mv_ptr[0];
1454 mv_ptr[3] = mv_ptr[1];
1455 mv_ptr[6] = mv_ptr[0];
1456 mv_ptr[7] = mv_ptr[1];
1465 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1466 /* get the mb info from the vme out */
1467 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1469 cu_msg[0] = (submb_pre_mode << 24 | /* interpred_idc[3:0][1:0] */
1470 zero << 23 | /* reserved */
1471 qp << 16 | /* CU_qp */
1472 zero << 11 | /* reserved */
1473 5 << 8 | /* intra_chroma_mode */
1474 zero << 7 | /* IPCM_enable , reserved for SKL*/
1475 cu_part_mode << 4 | /* cu_part_mode */
1476 zero << 3 | /* cu_transquant_bypass_flag */
1477 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1478 cu_size /* cu_size */
1480 cu_msg[1] = (zero << 30 | /* reserved */
1481 zero << 24 | /* intra_mode */
1482 zero << 22 | /* reserved */
1483 zero << 16 | /* intra_mode */
1484 zero << 14 | /* reserved */
1485 zero << 8 | /* intra_mode */
1486 zero << 6 | /* reserved */
1487 zero /* intra_mode */
1489 /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1490 cu_msg[2] = ((mv_ptr[2] & 0xffff) << 16 | /* mvx_l0[1] */
1491 (mv_ptr[0] & 0xffff) /* mvx_l0[0] */
1493 cu_msg[3] = ((mv_ptr[6] & 0xffff) << 16 | /* mvx_l0[3] */
1494 (mv_ptr[4] & 0xffff) /* mvx_l0[2] */
1496 cu_msg[4] = ((mv_ptr[2] & 0xffff0000) | /* mvy_l0[1] */
1497 (mv_ptr[0] & 0xffff0000) >> 16 /* mvy_l0[0] */
1499 cu_msg[5] = ((mv_ptr[6] & 0xffff0000) | /* mvy_l0[3] */
1500 (mv_ptr[4] & 0xffff0000) >> 16 /* mvy_l0[2] */
1503 cu_msg[6] = ((mv_ptr[3] & 0xffff) << 16 | /* mvx_l1[1] */
1504 (mv_ptr[1] & 0xffff) /* mvx_l1[0] */
1506 cu_msg[7] = ((mv_ptr[7] & 0xffff) << 16 | /* mvx_l1[3] */
1507 (mv_ptr[5] & 0xffff) /* mvx_l1[2] */
1509 cu_msg[8] = ((mv_ptr[3] & 0xffff0000) | /* mvy_l1[1] */
1510 (mv_ptr[1] & 0xffff0000) >> 16 /* mvy_l1[0] */
1512 cu_msg[9] = ((mv_ptr[7] & 0xffff0000) | /* mvy_l1[3] */
1513 (mv_ptr[5] & 0xffff0000) >> 16 /* mvy_l1[2] */
1516 cu_msg[10] = (((vme_context->ref_index_in_mb[1] >> 24) & 0xf) << 28 | /* ref_idx_l1[3] */
1517 ((vme_context->ref_index_in_mb[1] >> 16) & 0xf) << 24 | /* ref_idx_l1[2] */
1518 ((vme_context->ref_index_in_mb[1] >> 8) & 0xf) << 20 | /* ref_idx_l1[1] */
1519 ((vme_context->ref_index_in_mb[1] >> 0) & 0xf) << 16 | /* ref_idx_l1[0] */
1520 ((vme_context->ref_index_in_mb[0] >> 24) & 0xf) << 12 | /* ref_idx_l0[3] */
1521 ((vme_context->ref_index_in_mb[0] >> 16) & 0xf) << 8 | /* ref_idx_l0[2] */
1522 ((vme_context->ref_index_in_mb[0] >> 8) & 0xf) << 4 | /* ref_idx_l0[1] */
1523 ((vme_context->ref_index_in_mb[0] >> 0) & 0xf) /* ref_idx_l0[0] */
1526 cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1527 cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1528 zero << 16 | /* reserved */
1529 zero /* tu_xform_Yskip[15:0] */
1531 cu_msg[13] = (zero << 16 | /* tu_xform_Vskip[15:0] */
1532 zero /* tu_xform_Uskip[15:0] */
1538 #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1539 #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1540 #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1541 #define HEVC_SPLIT_CU_FLAG_8_8 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1545 intel_hevc_slice_insert_packed_data(VADriverContextP ctx,
1546 struct encode_state *encode_state,
1547 struct intel_encoder_context *encoder_context,
1549 struct intel_batchbuffer *slice_batch)
1551 int count, i, start_index;
1552 unsigned int length_in_bits;
1553 VAEncPackedHeaderParameterBuffer *param = NULL;
1554 unsigned int *header_data = NULL;
1555 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1556 int slice_header_index;
1558 if (encode_state->slice_header_index[slice_index] == 0)
1559 slice_header_index = -1;
1561 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1563 count = encode_state->slice_rawdata_count[slice_index];
1564 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1566 for (i = 0; i < count; i++) {
1567 unsigned int skip_emul_byte_cnt;
1569 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1571 param = (VAEncPackedHeaderParameterBuffer *)
1572 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1574 /* skip the slice header packed data type as it is lastly inserted */
1575 if (param->type == VAEncPackedHeaderSlice)
1578 length_in_bits = param->bit_length;
1580 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1582 /* as the slice header is still required, the last header flag is set to
1585 mfc_context->insert_object(ctx,
1588 ALIGN(length_in_bits, 32) >> 5,
1589 length_in_bits & 0x1f,
1593 !param->has_emulation_bytes,
1597 if (slice_header_index == -1) {
1598 unsigned char *slice_header = NULL;
1599 int slice_header_length_in_bits = 0;
1600 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1601 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1602 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1604 /* For the Normal HEVC */
1605 slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter,
1610 mfc_context->insert_object(ctx, encoder_context,
1611 (unsigned int *)slice_header,
1612 ALIGN(slice_header_length_in_bits, 32) >> 5,
1613 slice_header_length_in_bits & 0x1f,
1614 5, /* first 6 bytes are start code + nal unit type */
1615 1, 0, 1, slice_batch);
1618 unsigned int skip_emul_byte_cnt;
1620 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1622 param = (VAEncPackedHeaderParameterBuffer *)
1623 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1624 length_in_bits = param->bit_length;
1626 /* as the slice header is the last header data for one slice,
1627 * the last header flag is set to one.
1629 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1631 mfc_context->insert_object(ctx,
1634 ALIGN(length_in_bits, 32) >> 5,
1635 length_in_bits & 0x1f,
1639 !param->has_emulation_bytes,
1647 gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
1648 struct encode_state *encode_state,
1649 struct intel_encoder_context *encoder_context,
1651 struct intel_batchbuffer *slice_batch)
1653 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1654 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1655 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1656 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1657 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1658 int qp_slice = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1659 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1660 //unsigned char *slice_header = NULL; // for future use
1661 //int slice_header_length_in_bits = 0;
1662 unsigned int tail_data[] = { 0x0, 0x0 };
1663 int slice_type = pSliceParameter->slice_type;
1665 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1666 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1667 int ctb_size = 1 << log2_ctb_size;
1668 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
1669 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
1670 int last_slice = (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice) == (width_in_ctb * height_in_ctb);
1671 int ctb_width_in_mb = (ctb_size + 15) / 16;
1672 int i_ctb, ctb_x, ctb_y;
1673 unsigned int split_coding_unit_flag = 0;
1674 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1675 int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size) > 0 ? 1 : 0;
1676 int col_pad_flag = (pSequenceParameter->pic_width_in_luma_samples % ctb_size) > 0 ? 1 : 0;
1678 int is_intra = (slice_type == HEVC_SLICE_I);
1679 unsigned int *msg = NULL;
1680 unsigned char *msg_ptr = NULL;
1681 int macroblock_address = 0;
1682 int num_cu_record = 64;
1684 int tmp_mb_mode = 0;
1685 int mb_x = 0, mb_y = 0;
1688 int inter_rdo, intra_rdo;
1690 int drop_cu_row_in_last_mb = 0;
1691 int drop_cu_column_in_last_mb = 0;
1693 if (log2_ctb_size == 5) num_cu_record = 16;
1694 else if (log2_ctb_size == 4) num_cu_record = 4;
1695 else if (log2_ctb_size == 6) num_cu_record = 64;
1698 if (rate_control_mode == VA_RC_CBR) {
1699 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1700 if (slice_type == HEVC_SLICE_B) {
1701 if (pSequenceParameter->ip_period == 1) {
1702 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1704 } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
1705 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1708 if (encode_state->slice_header_index[slice_index] == 0) {
1709 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1713 /* only support for 8-bit pixel bit-depth */
1714 assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 <= 2);
1715 assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 <= 2);
1716 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1717 assert(qp >= 0 && qp < 52);
1720 gen9_hcpe_hevc_slice_state(ctx,
1723 encode_state, encoder_context,
1726 if (slice_index == 0)
1727 intel_hcpe_hevc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1729 intel_hevc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1732 slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header, slice_index);
1733 int skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)slice_header, slice_header_length_in_bits);
1735 mfc_context->insert_object(ctx, encoder_context,
1736 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1738 1, 0, 1, slice_batch);
1745 split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1747 dri_bo_map(vme_context->vme_output.bo , 1);
1748 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1749 dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
1751 for (i_ctb = pSliceParameter->slice_segment_address; i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1752 int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
1753 int ctb_height_in_mb_internal = ctb_width_in_mb;
1754 int ctb_width_in_mb_internal = ctb_width_in_mb;
1755 int max_cu_num_in_mb = 4;
1757 ctb_x = i_ctb % width_in_ctb;
1758 ctb_y = i_ctb / width_in_ctb;
1760 drop_cu_row_in_last_mb = 0;
1761 drop_cu_column_in_last_mb = 0;
1763 if (ctb_y == (height_in_ctb - 1) && row_pad_flag) {
1764 ctb_height_in_mb_internal = (pSequenceParameter->pic_height_in_luma_samples - (ctb_y * ctb_size) + 15) / 16;
1766 if ((log2_cu_size == 3) && (pSequenceParameter->pic_height_in_luma_samples % 16))
1767 drop_cu_row_in_last_mb = (16 - (pSequenceParameter->pic_height_in_luma_samples % 16)) >> log2_cu_size;
1770 if (ctb_x == (width_in_ctb - 1) && col_pad_flag) {
1771 ctb_width_in_mb_internal = (pSequenceParameter->pic_width_in_luma_samples - (ctb_x * ctb_size) + 15) / 16;
1773 if ((log2_cu_size == 3) && (pSequenceParameter->pic_width_in_luma_samples % 16))
1774 drop_cu_column_in_last_mb = (16 - (pSequenceParameter->pic_width_in_luma_samples % 16)) >> log2_cu_size;
1779 macroblock_address = ctb_y * width_in_mbs * ctb_width_in_mb + ctb_x * ctb_width_in_mb;
1780 split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1785 for (mb_y = 0; mb_y < ctb_height_in_mb_internal; mb_y++) {
1786 mb_addr = macroblock_address + mb_y * width_in_mbs ;
1787 for (mb_x = 0; mb_x < ctb_width_in_mb_internal; mb_x++) {
1788 max_cu_num_in_mb = 4;
1789 if (drop_cu_row_in_last_mb && (mb_y == ctb_height_in_mb_internal - 1))
1790 max_cu_num_in_mb /= 2;
1792 if (drop_cu_column_in_last_mb && (mb_x == ctb_width_in_mb_internal - 1))
1793 max_cu_num_in_mb /= 2;
1795 /* get the mb info from the vme out */
1796 msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
1798 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1799 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1800 /*fill to indirect cu */
1802 if (is_intra || intra_rdo < inter_rdo) {
1804 tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1805 if (max_cu_num_in_mb < 4) {
1806 if (tmp_mb_mode == AVC_INTRA_16X16) {
1807 msg[0] = (msg[0] & !AVC_INTRA_MODE_MASK) | (AVC_INTRA_8X8 << 4);
1808 tmp_mb_mode = AVC_INTRA_8X8;
1811 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1812 if (--max_cu_num_in_mb > 0)
1813 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1815 if (ctb_width_in_mb == 2)
1816 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1817 else if (ctb_width_in_mb == 1)
1818 split_coding_unit_flag |= 0x1 << 20;
1819 } else if (tmp_mb_mode == AVC_INTRA_16X16) {
1820 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1821 } else { // for 4x4 to use 8x8 replace
1822 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1823 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1824 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1825 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 3);
1826 if (ctb_width_in_mb == 2)
1827 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1828 else if (ctb_width_in_mb == 1)
1829 split_coding_unit_flag |= 0x1 << 20;
1832 msg += AVC_INTER_MSG_OFFSET;
1834 tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
1835 if (max_cu_num_in_mb < 4) {
1836 if (tmp_mb_mode != AVC_INTER_8X8) {
1837 msg[0] = (msg[0] & !AVC_INTER_MODE_MASK) | AVC_INTER_8X8;
1838 tmp_mb_mode = AVC_INTER_8X8;
1840 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1841 if (--max_cu_num_in_mb > 0)
1842 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1844 if (ctb_width_in_mb == 2)
1845 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1846 else if (ctb_width_in_mb == 1)
1847 split_coding_unit_flag |= 0x1 << 20;
1848 } else if (tmp_mb_mode == AVC_INTER_8X8) {
1849 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1850 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1851 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1852 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 3);
1853 if (ctb_width_in_mb == 2)
1854 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1855 else if (ctb_width_in_mb == 1)
1856 split_coding_unit_flag |= 0x1 << 20;
1858 } else if (tmp_mb_mode == AVC_INTER_16X16 ||
1859 tmp_mb_mode == AVC_INTER_8X16 ||
1860 tmp_mb_mode == AVC_INTER_16X8) {
1861 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1868 cu_count = cu_index;
1869 // PAK object fill accordingly.
1870 gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
1873 dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1874 dri_bo_unmap(vme_context->vme_output.bo);
1877 mfc_context->insert_object(ctx, encoder_context,
1879 2, 1, 1, 0, slice_batch);
1881 mfc_context->insert_object(ctx, encoder_context,
1883 1, 1, 1, 0, slice_batch);
1888 gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
1889 struct encode_state *encode_state,
1890 struct intel_encoder_context *encoder_context)
1892 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1893 struct intel_batchbuffer *batch;
1897 batch = mfc_context->aux_batchbuffer;
1898 batch_bo = batch->buffer;
1900 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1901 gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1904 intel_batchbuffer_align(batch, 8);
1906 BEGIN_BCS_BATCH(batch, 2);
1907 OUT_BCS_BATCH(batch, 0);
1908 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1909 ADVANCE_BCS_BATCH(batch);
1911 dri_bo_reference(batch_bo);
1912 intel_batchbuffer_free(batch);
1913 mfc_context->aux_batchbuffer = NULL;
1923 gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,
1924 struct encode_state *encode_state,
1925 struct intel_encoder_context *encoder_context)
1927 struct i965_driver_data *i965 = i965_driver_data(ctx);
1928 struct intel_batchbuffer *batch = encoder_context->base.batch;
1929 dri_bo *slice_batch_bo;
1931 #ifdef HCP_SOFTWARE_SKYLAKE
1932 slice_batch_bo = gen9_hcpe_hevc_software_batchbuffer(ctx, encode_state, encoder_context);
1934 slice_batch_bo = gen9_hcpe_hevc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1938 if (i965->intel.has_bsd2)
1939 intel_batchbuffer_start_atomic_bcs_override(batch, 0x4000, BSD_RING0);
1941 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1942 intel_batchbuffer_emit_mi_flush(batch);
1944 // picture level programing
1945 gen9_hcpe_hevc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1947 BEGIN_BCS_BATCH(batch, 3);
1948 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1949 OUT_BCS_RELOC64(batch,
1951 I915_GEM_DOMAIN_COMMAND, 0,
1953 ADVANCE_BCS_BATCH(batch);
1956 intel_batchbuffer_end_atomic(batch);
1958 dri_bo_unreference(slice_batch_bo);
1961 void intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,
1962 struct encode_state *encode_state,
1963 struct intel_encoder_context *encoder_context,
1964 struct intel_batchbuffer *slice_batch)
1966 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1967 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS);
1968 unsigned int skip_emul_byte_cnt;
1970 if (encode_state->packed_header_data[idx]) {
1971 VAEncPackedHeaderParameterBuffer *param = NULL;
1972 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1973 unsigned int length_in_bits;
1975 assert(encode_state->packed_header_param[idx]);
1976 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1977 length_in_bits = param->bit_length;
1979 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1980 mfc_context->insert_object(ctx,
1983 ALIGN(length_in_bits, 32) >> 5,
1984 length_in_bits & 0x1f,
1988 !param->has_emulation_bytes,
1992 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS) + 1; // index to SPS
1994 if (encode_state->packed_header_data[idx]) {
1995 VAEncPackedHeaderParameterBuffer *param = NULL;
1996 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1997 unsigned int length_in_bits;
1999 assert(encode_state->packed_header_param[idx]);
2000 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2001 length_in_bits = param->bit_length;
2003 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2004 mfc_context->insert_object(ctx,
2007 ALIGN(length_in_bits, 32) >> 5,
2008 length_in_bits & 0x1f,
2012 !param->has_emulation_bytes,
2016 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_PPS);
2018 if (encode_state->packed_header_data[idx]) {
2019 VAEncPackedHeaderParameterBuffer *param = NULL;
2020 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2021 unsigned int length_in_bits;
2023 assert(encode_state->packed_header_param[idx]);
2024 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2025 length_in_bits = param->bit_length;
2027 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2029 mfc_context->insert_object(ctx,
2032 ALIGN(length_in_bits, 32) >> 5,
2033 length_in_bits & 0x1f,
2037 !param->has_emulation_bytes,
2041 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_SEI);
2043 if (encode_state->packed_header_data[idx]) {
2044 VAEncPackedHeaderParameterBuffer *param = NULL;
2045 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2046 unsigned int length_in_bits;
2048 assert(encode_state->packed_header_param[idx]);
2049 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2050 length_in_bits = param->bit_length;
2052 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2053 mfc_context->insert_object(ctx,
2056 ALIGN(length_in_bits, 32) >> 5,
2057 length_in_bits & 0x1f,
2061 !param->has_emulation_bytes,
2066 VAStatus intel_hcpe_hevc_prepare(VADriverContextP ctx,
2067 struct encode_state *encode_state,
2068 struct intel_encoder_context *encoder_context)
2070 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2071 struct object_surface *obj_surface;
2072 struct object_buffer *obj_buffer;
2073 GenHevcSurface *hevc_encoder_surface;
2075 VAStatus vaStatus = VA_STATUS_SUCCESS;
2077 struct i965_coded_buffer_segment *coded_buffer_segment;
2079 /*Setup all the input&output object*/
2081 /* Setup current frame and current direct mv buffer*/
2082 obj_surface = encode_state->reconstructed_object;
2084 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2085 assert(hevc_encoder_surface);
2087 if (hevc_encoder_surface) {
2088 hevc_encoder_surface->has_p010_to_nv12_done = 0;
2089 hevc_encoder_surface->base.frame_store_id = -1;
2090 mfc_context->current_collocated_mv_temporal_buffer[NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS - 1].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2091 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2094 mfc_context->surface_state.width = obj_surface->orig_width;
2095 mfc_context->surface_state.height = obj_surface->orig_height;
2096 mfc_context->surface_state.w_pitch = obj_surface->width;
2097 mfc_context->surface_state.h_pitch = obj_surface->height;
2099 /* Setup reference frames and direct mv buffers*/
2100 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2101 obj_surface = encode_state->reference_objects[i];
2103 if (obj_surface && obj_surface->bo) {
2104 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2105 dri_bo_reference(obj_surface->bo);
2107 /* Check MV temporal buffer */
2108 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2109 assert(hevc_encoder_surface);
2111 if (hevc_encoder_surface) {
2112 hevc_encoder_surface->base.frame_store_id = -1;
2113 /* Setup MV temporal buffer */
2114 mfc_context->current_collocated_mv_temporal_buffer[i].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2115 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2123 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
2124 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2126 obj_buffer = encode_state->coded_buf_object;
2127 bo = obj_buffer->buffer_store->bo;
2128 mfc_context->hcp_indirect_pak_bse_object.bo = bo;
2129 mfc_context->hcp_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2130 mfc_context->hcp_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2131 dri_bo_reference(mfc_context->hcp_indirect_pak_bse_object.bo);
2134 coded_buffer_segment = (struct i965_coded_buffer_segment *)(bo->virtual);
2135 coded_buffer_segment->mapped = 0;
2136 coded_buffer_segment->codec = encoder_context->codec;
2142 /* HEVC BRC related */
2145 intel_hcpe_bit_rate_control_context_init(struct encode_state *encode_state,
2146 struct intel_encoder_context *encoder_context)
2148 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2149 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2151 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2152 int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2154 double fps = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
2155 double bitrate = encoder_context->brc.bits_per_second[0];
2156 int inter_mb_size = bitrate * 1.0 / (fps + 4.0) / width_in_mbs / height_in_mbs;
2157 int intra_mb_size = inter_mb_size * 5.0;
2160 mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_mb_size = intra_mb_size;
2161 mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
2162 mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_mb_size = inter_mb_size;
2163 mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2164 mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_mb_size = inter_mb_size;
2165 mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2167 for (i = 0 ; i < 3; i++) {
2168 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
2169 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
2170 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
2171 mfc_context->bit_rate_control_context[i].GrowInit = 6;
2172 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
2173 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
2174 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
2176 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
2177 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
2178 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
2179 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
2180 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
2181 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
2184 mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord = (intra_mb_size + 16) / 16;
2185 mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord = (inter_mb_size + 16) / 16;
2186 mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord = (inter_mb_size + 16) / 16;
2188 mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord * 1.5;
2189 mfc_context->bit_rate_control_context[HEVC_SLICE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord * 1.5;
2190 mfc_context->bit_rate_control_context[HEVC_SLICE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord * 1.5;
2193 static void intel_hcpe_brc_init(struct encode_state *encode_state,
2194 struct intel_encoder_context* encoder_context)
2196 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2197 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2199 double bitrate = (double)encoder_context->brc.bits_per_second[0];
2200 double framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
2201 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
2202 int intra_period = pSequenceParameter->intra_period;
2203 int ip_period = pSequenceParameter->ip_period;
2204 double qp1_size = 0.1 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2205 double qp51_size = 0.001 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2210 double buffer_size = 0;
2213 if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) ||
2214 (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2217 qp1_size = qp1_size * bpp;
2218 qp51_size = qp51_size * bpp;
2220 if (pSequenceParameter->ip_period) {
2221 pnum = (intra_period + ip_period - 1) / ip_period - 1;
2222 bnum = intra_period - inum - pnum;
2225 mfc_context->brc.mode = encoder_context->rate_control_mode;
2227 mfc_context->brc.target_frame_size[HEVC_SLICE_I] = (int)((double)((bitrate * intra_period) / framerate) /
2228 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
2229 mfc_context->brc.target_frame_size[HEVC_SLICE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2230 mfc_context->brc.target_frame_size[HEVC_SLICE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2232 mfc_context->brc.gop_nums[HEVC_SLICE_I] = inum;
2233 mfc_context->brc.gop_nums[HEVC_SLICE_P] = pnum;
2234 mfc_context->brc.gop_nums[HEVC_SLICE_B] = bnum;
2236 bpf = mfc_context->brc.bits_per_frame = bitrate / framerate;
2238 if (!encoder_context->brc.hrd_buffer_size) {
2239 mfc_context->hrd.buffer_size = bitrate * ratio;
2240 mfc_context->hrd.current_buffer_fullness =
2241 (double)(bitrate * ratio / 2 < mfc_context->hrd.buffer_size) ?
2242 bitrate * ratio / 2 : mfc_context->hrd.buffer_size / 2.;
2244 buffer_size = (double)encoder_context->brc.hrd_buffer_size;
2245 if (buffer_size < bitrate * ratio_min) {
2246 buffer_size = bitrate * ratio_min;
2247 } else if (buffer_size > bitrate * ratio_max) {
2248 buffer_size = bitrate * ratio_max ;
2250 mfc_context->hrd.buffer_size = buffer_size;
2251 if (encoder_context->brc.hrd_initial_buffer_fullness) {
2252 mfc_context->hrd.current_buffer_fullness =
2253 (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
2254 encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
2256 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size / 2.;
2261 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size / 2.;
2262 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size / qp1_size;
2263 mfc_context->hrd.violation_noted = 0;
2265 if ((bpf > qp51_size) && (bpf < qp1_size)) {
2266 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
2267 } else if (bpf >= qp1_size)
2268 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 1;
2269 else if (bpf <= qp51_size)
2270 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51;
2272 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2273 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2275 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 36);
2276 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 40);
2277 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 45);
2280 int intel_hcpe_update_hrd(struct encode_state *encode_state,
2281 struct gen9_hcpe_context *mfc_context,
2284 double prev_bf = mfc_context->hrd.current_buffer_fullness;
2286 mfc_context->hrd.current_buffer_fullness -= frame_bits;
2288 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
2289 mfc_context->hrd.current_buffer_fullness = prev_bf;
2290 return BRC_UNDERFLOW;
2293 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
2294 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
2295 if (mfc_context->brc.mode == VA_RC_VBR)
2296 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
2298 mfc_context->hrd.current_buffer_fullness = prev_bf;
2299 return BRC_OVERFLOW;
2302 return BRC_NO_HRD_VIOLATION;
2305 int intel_hcpe_brc_postpack(struct encode_state *encode_state,
2306 struct gen9_hcpe_context *mfc_context,
2309 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
2310 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2311 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2312 int slicetype = pSliceParameter->slice_type;
2313 int qpi = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2314 int qpp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2315 int qpb = mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY;
2316 int qp; // quantizer of previously encoded slice of current type
2317 int qpn; // predicted quantizer for next frame of current type in integer format
2318 double qpf; // predicted quantizer for next frame of current type in float format
2319 double delta_qp; // QP correction
2320 int target_frame_size, frame_size_next;
2322 * x - how far we are from HRD buffer borders
2323 * y - how far we are from target HRD buffer fullness
2326 double frame_size_alpha;
2328 if (slicetype == HEVC_SLICE_B) {
2329 if (pSequenceParameter->ip_period == 1) {
2330 slicetype = HEVC_SLICE_P;
2331 } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
2332 slicetype = HEVC_SLICE_P;
2336 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
2338 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
2339 if (mfc_context->hrd.buffer_capacity < 5)
2340 frame_size_alpha = 0;
2342 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
2343 if (frame_size_alpha > 30) frame_size_alpha = 30;
2344 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
2345 (double)(frame_size_alpha + 1.);
2347 /* frame_size_next: avoiding negative number and too small value */
2348 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
2349 frame_size_next = (int)((double)target_frame_size * 0.25);
2351 qpf = (double)qp * target_frame_size / frame_size_next;
2352 qpn = (int)(qpf + 0.5);
2355 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
2356 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
2357 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
2359 mfc_context->brc.qpf_rounding_accumulator = 0.;
2360 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
2362 mfc_context->brc.qpf_rounding_accumulator = 0.;
2365 /* making sure that QP is not changing too fast */
2366 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
2367 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
2368 /* making sure that with QP predictions we did do not leave QPs range */
2369 BRC_CLIP(qpn, 1, 51);
2371 /* checking wthether HRD compliance is still met */
2372 sts = intel_hcpe_update_hrd(encode_state, mfc_context, frame_bits);
2374 /* calculating QP delta as some function*/
2375 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
2377 x /= mfc_context->hrd.target_buffer_fullness;
2378 y = mfc_context->hrd.current_buffer_fullness;
2380 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
2381 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
2383 if (y < 0.01) y = 0.01;
2385 else if (x < -1) x = -1;
2387 delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
2388 qpn = (int)(qpn + delta_qp + 0.5);
2390 /* making sure that with QP predictions we did do not leave QPs range */
2391 BRC_CLIP(qpn, 1, 51);
2393 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
2394 /* correcting QPs of slices of other types */
2395 if (slicetype == HEVC_SLICE_P) {
2396 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
2397 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
2398 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
2399 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
2400 } else if (slicetype == HEVC_SLICE_I) {
2401 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
2402 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
2403 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
2404 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
2405 } else { // HEVC_SLICE_B
2406 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
2407 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
2408 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
2409 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
2411 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
2412 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
2413 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
2414 } else if (sts == BRC_UNDERFLOW) { // underflow
2415 if (qpn <= qp) qpn = qp + 1;
2418 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
2420 } else if (sts == BRC_OVERFLOW) {
2421 if (qpn >= qp) qpn = qp - 1;
2422 if (qpn < 1) { // < 0 (?) overflow with minQP
2424 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
2428 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
2433 static void intel_hcpe_hrd_context_init(struct encode_state *encode_state,
2434 struct intel_encoder_context *encoder_context)
2436 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2437 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2438 unsigned int target_bit_rate = encoder_context->brc.bits_per_second[0];
2440 // current we only support CBR mode.
2441 if (rate_control_mode == VA_RC_CBR) {
2442 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
2443 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
2444 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
2445 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
2446 mfc_context->vui_hrd.i_frame_number = 0;
2448 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
2449 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
2450 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
2456 intel_hcpe_hrd_context_update(struct encode_state *encode_state,
2457 struct gen9_hcpe_context *mfc_context)
2459 mfc_context->vui_hrd.i_frame_number++;
2462 int intel_hcpe_interlace_check(VADriverContextP ctx,
2463 struct encode_state *encode_state,
2464 struct intel_encoder_context *encoder_context)
2466 VAEncSliceParameterBufferHEVC *pSliceParameter;
2467 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2468 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2469 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2470 int ctb_size = 1 << log2_ctb_size;
2471 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2472 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2476 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2477 pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
2478 ctbCount += pSliceParameter->num_ctu_in_slice;
2481 if (ctbCount == (width_in_ctb * height_in_ctb))
2487 void intel_hcpe_brc_prepare(struct encode_state *encode_state,
2488 struct intel_encoder_context *encoder_context)
2490 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2491 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2493 if (rate_control_mode == VA_RC_CBR) {
2495 assert(encoder_context->codec != CODEC_MPEG2);
2497 brc_updated = encoder_context->brc.need_reset;
2499 /*Programing bit rate control */
2500 if ((mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord == 0) ||
2502 intel_hcpe_bit_rate_control_context_init(encode_state, encoder_context);
2503 intel_hcpe_brc_init(encode_state, encoder_context);
2506 /*Programing HRD control */
2507 if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated)
2508 intel_hcpe_hrd_context_init(encode_state, encoder_context);
2512 /* HEVC interface API for encoder */
2515 gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,
2516 struct encode_state *encode_state,
2517 struct intel_encoder_context *encoder_context)
2519 struct gen9_hcpe_context *hcpe_context = encoder_context->mfc_context;
2520 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2521 int current_frame_bits_size;
2525 gen9_hcpe_init(ctx, encode_state, encoder_context);
2526 intel_hcpe_hevc_prepare(ctx, encode_state, encoder_context);
2527 /*Programing bcs pipeline*/
2528 gen9_hcpe_hevc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
2529 gen9_hcpe_run(ctx, encode_state, encoder_context);
2530 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
2531 gen9_hcpe_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
2532 sts = intel_hcpe_brc_postpack(encode_state, hcpe_context, current_frame_bits_size);
2533 if (sts == BRC_NO_HRD_VIOLATION) {
2534 intel_hcpe_hrd_context_update(encode_state, hcpe_context);
2536 } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
2537 if (!hcpe_context->hrd.violation_noted) {
2538 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
2539 hcpe_context->hrd.violation_noted = 1;
2541 return VA_STATUS_SUCCESS;
2548 return VA_STATUS_SUCCESS;
2552 gen9_hcpe_context_destroy(void *context)
2554 struct gen9_hcpe_context *hcpe_context = context;
2557 dri_bo_unreference(hcpe_context->deblocking_filter_line_buffer.bo);
2558 hcpe_context->deblocking_filter_line_buffer.bo = NULL;
2560 dri_bo_unreference(hcpe_context->deblocking_filter_tile_line_buffer.bo);
2561 hcpe_context->deblocking_filter_tile_line_buffer.bo = NULL;
2563 dri_bo_unreference(hcpe_context->deblocking_filter_tile_column_buffer.bo);
2564 hcpe_context->deblocking_filter_tile_column_buffer.bo = NULL;
2566 dri_bo_unreference(hcpe_context->uncompressed_picture_source.bo);
2567 hcpe_context->uncompressed_picture_source.bo = NULL;
2569 dri_bo_unreference(hcpe_context->metadata_line_buffer.bo);
2570 hcpe_context->metadata_line_buffer.bo = NULL;
2572 dri_bo_unreference(hcpe_context->metadata_tile_line_buffer.bo);
2573 hcpe_context->metadata_tile_line_buffer.bo = NULL;
2575 dri_bo_unreference(hcpe_context->metadata_tile_column_buffer.bo);
2576 hcpe_context->metadata_tile_column_buffer.bo = NULL;
2578 dri_bo_unreference(hcpe_context->sao_line_buffer.bo);
2579 hcpe_context->sao_line_buffer.bo = NULL;
2581 dri_bo_unreference(hcpe_context->sao_tile_line_buffer.bo);
2582 hcpe_context->sao_tile_line_buffer.bo = NULL;
2584 dri_bo_unreference(hcpe_context->sao_tile_column_buffer.bo);
2585 hcpe_context->sao_tile_column_buffer.bo = NULL;
2587 /* mv temporal buffer */
2588 for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
2589 if (hcpe_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
2590 dri_bo_unreference(hcpe_context->current_collocated_mv_temporal_buffer[i].bo);
2591 hcpe_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
2594 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2595 dri_bo_unreference(hcpe_context->reference_surfaces[i].bo);
2596 hcpe_context->reference_surfaces[i].bo = NULL;
2599 dri_bo_unreference(hcpe_context->hcp_indirect_cu_object.bo);
2600 hcpe_context->hcp_indirect_cu_object.bo = NULL;
2602 dri_bo_unreference(hcpe_context->hcp_indirect_pak_bse_object.bo);
2603 hcpe_context->hcp_indirect_pak_bse_object.bo = NULL;
2605 dri_bo_unreference(hcpe_context->hcp_batchbuffer_surface.bo);
2606 hcpe_context->hcp_batchbuffer_surface.bo = NULL;
2608 dri_bo_unreference(hcpe_context->aux_batchbuffer_surface.bo);
2609 hcpe_context->aux_batchbuffer_surface.bo = NULL;
2611 if (hcpe_context->aux_batchbuffer)
2612 intel_batchbuffer_free(hcpe_context->aux_batchbuffer);
2614 hcpe_context->aux_batchbuffer = NULL;
2619 VAStatus gen9_hcpe_pipeline(VADriverContextP ctx,
2621 struct encode_state *encode_state,
2622 struct intel_encoder_context *encoder_context)
2627 case VAProfileHEVCMain:
2628 case VAProfileHEVCMain10:
2629 vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context);
2633 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2640 Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2642 struct gen9_hcpe_context *hcpe_context = calloc(1, sizeof(struct gen9_hcpe_context));
2644 assert(hcpe_context);
2645 hcpe_context->pipe_mode_select = gen9_hcpe_pipe_mode_select;
2646 hcpe_context->set_surface_state = gen9_hcpe_surface_state;
2647 hcpe_context->ind_obj_base_addr_state = gen9_hcpe_ind_obj_base_addr_state;
2648 hcpe_context->pic_state = gen9_hcpe_hevc_pic_state;
2649 hcpe_context->qm_state = gen9_hcpe_hevc_qm_state;
2650 hcpe_context->fqm_state = gen9_hcpe_hevc_fqm_state;
2651 hcpe_context->insert_object = gen9_hcpe_hevc_insert_object;
2652 hcpe_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2654 encoder_context->mfc_context = hcpe_context;
2655 encoder_context->mfc_context_destroy = gen9_hcpe_context_destroy;
2656 encoder_context->mfc_pipeline = gen9_hcpe_pipeline;
2657 encoder_context->mfc_brc_prepare = intel_hcpe_brc_prepare;
2659 hevc_gen_default_iq_matrix_encoder(&hcpe_context->iq_matrix_hevc);